asmjit库文件丢失修复

2022-09-15 13:18:33 +08:00 · 2022-09-15 13:18:33 +08:00 · 73c77e5ac7
parent aa7ab5ee96
commit 73c77e5ac7
55 changed files with 41767 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -22,9 +22,9 @@ mono_crash.*
 [Rr]elease/
 [Rr]eleases/
 x64/
-x86/
+#x86/
 [Ww][Ii][Nn]32/
-[Aa][Rr][Mm]/
+#[Aa][Rr][Mm]/
 [Aa][Rr][Mm]64/
 bld/
 [Bb]in/
--- a/src/asmjit/arm/a64archtraits_p.h
+++ b/src/asmjit/arm/a64archtraits_p.h
@ -0,0 +1,81 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
+#define ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+#include "../core/type.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+static const constexpr ArchTraits a64ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdFp, Gp::kIdLr, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment (AArch64 requires stack aligned to 64 bytes).
+  16,
+
+  // Min/max stack offset - byte addressing is the worst, VecQ addressing the best.
+  4095, 65520,
+
+  // Instruction hints [Gp, Vec, ExtraVirt2, ExtraVirt3].
+  {{
+    InstHints::kPushPop,
+    InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // RegInfo.
+  #define V(index) OperandSignature{arm::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(arm::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kARM_GpW   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt64)   ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt64)  ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kARM_GpX   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kARM_VecS  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kARM_VecD  : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kByte,
+    ArchTypeNameId::kHWord,
+    ArchTypeNameId::kWord,
+    ArchTypeNameId::kXWord
+  }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64ARCHTRAITS_P_H_INCLUDED
--- a/src/asmjit/arm/a64assembler.cpp
+++ b/src/asmjit/arm/a64assembler.cpp
--- a/src/asmjit/arm/a64assembler.h
+++ b/src/asmjit/arm/a64assembler.h
@ -0,0 +1,72 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
+#define ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../arm/a64emitter.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 assembler implementation.
+class ASMJIT_VIRTAPI Assembler
+  : public BaseAssembler,
+    public EmitterExplicitT<Assembler> {
+
+public:
+  typedef BaseAssembler Base;
+
+  //! \name Construction / Destruction
+  //! \{
+
+  ASMJIT_API Assembler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Assembler() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Gets whether the current ARM mode is THUMB (alternative to 32-bit ARM encoding).
+  inline bool isInThumbMode() const noexcept { return _environment.isArchThumb(); }
+
+  //! Gets the current code alignment of the current mode (ARM vs THUMB).
+  inline uint32_t codeAlignment() const noexcept { return isInThumbMode() ? 2 : 4; }
+
+  //! \}
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64ASSEMBLER_H_INCLUDED
--- a/src/asmjit/arm/a64builder.cpp
+++ b/src/asmjit/arm/a64builder.cpp
@ -0,0 +1,51 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_BUILDER)
+
+#include "../arm/a64assembler.h"
+#include "../arm/a64builder.h"
+#include "../arm/a64emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Builder - Construction & Destruction
+// =========================================
+
+Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Builder::~Builder() noexcept {}
+
+// a64::Builder - Events
+// =====================
+
+Error Builder::onAttach(CodeHolder* code) noexcept {
+  return Base::onAttach(code);
+}
+
+Error Builder::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// a64::Builder - Finalize
+// =======================
+
+Error Builder::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_BUILDER
--- a/src/asmjit/arm/a64builder.h
+++ b/src/asmjit/arm/a64builder.h
@ -0,0 +1,57 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64BUILDER_H_INCLUDED
+#define ASMJIT_ARM_A64BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../arm/a64emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 builder implementation.
+class ASMJIT_VIRTAPI Builder
+  : public BaseBuilder,
+    public EmitterExplicitT<Builder> {
+public:
+  ASMJIT_NONCOPYABLE(Builder)
+  typedef BaseBuilder Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Builder() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_ARM_A64BUILDER_H_INCLUDED
--- a/src/asmjit/arm/a64compiler.cpp
+++ b/src/asmjit/arm/a64compiler.cpp
@ -0,0 +1,60 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Compiler - Construction & Destruction
+// ==========================================
+
+Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
+  _archMask = uint64_t(1) << uint32_t(Arch::kAArch64);
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Compiler::~Compiler() noexcept {}
+
+// a64::Compiler - Events
+// ======================
+
+Error Compiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  Error err = addPassT<ARMRAPass>();
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error Compiler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// a64::Compiler - Finalize
+// ========================
+
+Error Compiler::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_COMPILER
--- a/src/asmjit/arm/a64compiler.h
+++ b/src/asmjit/arm/a64compiler.h
@ -0,0 +1,247 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
+#define ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/type.h"
+#include "../arm/a64emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64 compiler implementation.
+class ASMJIT_VIRTAPI Compiler
+  : public BaseCompiler,
+    public EmitterExplicitT<Compiler> {
+public:
+  ASMJIT_NONCOPYABLE(Compiler)
+  typedef BaseCompiler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Compiler() noexcept;
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+  //! \cond INTERNAL
+  template<typename RegT, typename Type>
+  inline RegT _newRegInternal(const Type& type) {
+    RegT reg(Globals::NoInit);
+    _newReg(&reg, type, nullptr);
+    return reg;
+  }
+
+  template<typename RegT, typename Type, typename... Args>
+  inline RegT _newRegInternal(const Type& type, const char* s, Args&&... args) {
+#ifndef ASMJIT_NO_LOGGING
+    RegT reg(Globals::NoInit);
+    if (sizeof...(Args) == 0)
+      _newReg(&reg, type, s);
+    else
+      _newRegFmt(&reg, type, s, std::forward<Args>(args)...);
+    return reg;
+#else
+    DebugUtils::unused(std::forward<Args>(args)...);
+    RegT reg(Globals::NoInit);
+    _newReg(&reg, type, nullptr);
+    return reg;
+#endif
+  }
+  //! \endcond
+
+  template<typename RegT, typename... Args>
+  inline RegT newSimilarReg(const RegT& ref, Args&&... args) {
+    return _newRegInternal<RegT>(ref, std::forward<Args>(args)...);
+  }
+
+  template<typename... Args>
+  inline Reg newReg(TypeId typeId, Args&&... args) { return _newRegInternal<Reg>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newGp(TypeId typeId, Args&&... args) { return _newRegInternal<Gp>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVec(TypeId typeId, Args&&... args) { return _newRegInternal<Vec>(typeId, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newInt32(Args&&... args) { return _newRegInternal<Gp>(TypeId::kInt32, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUInt32(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt32, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newInt64(Args&&... args) { return _newRegInternal<Gp>(TypeId::kInt64, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUInt64(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt64, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newIntPtr(Args&&... args) { return _newRegInternal<Gp>(TypeId::kIntPtr, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newUIntPtr(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUIntPtr, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Gp newGpw(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt32, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newGpx(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUInt64, std::forward<Args>(args)...); }
+  template<typename... Args>
+  inline Gp newGpz(Args&&... args) { return _newRegInternal<Gp>(TypeId::kUIntPtr, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecS(Args&&... args) { return _newRegInternal<Vec>(TypeId::kFloat32, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecD(Args&&... args) { return _newRegInternal<Vec>(TypeId::kFloat64, std::forward<Args>(args)...); }
+
+  template<typename... Args>
+  inline Vec newVecQ(Args&&... args) { return _newRegInternal<Vec>(TypeId::kUInt8x16, std::forward<Args>(args)...); }
+
+  //! \}
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new memory chunk allocated on the current function's stack.
+  inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
+    Mem m(Globals::NoInit);
+    _newStack(&m, size, alignment, name);
+    return m;
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Put data to a constant-pool and get a memory reference to it.
+  inline Mem newConst(ConstPoolScope scope, const void* data, size_t size) {
+    Mem m(Globals::NoInit);
+    _newConst(&m, scope, data, size);
+    return m;
+  }
+
+  //! Put a BYTE `val` to a constant-pool (8 bits).
+  inline Mem newByteConst(ConstPoolScope scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
+  //! Put a HWORD `val` to a constant-pool (16 bits).
+  inline Mem newHWordConst(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool (32 bits).
+  inline Mem newWordConst(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool (64 bits).
+  inline Mem newDWordConst(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newInt16Const(ConstPoolScope scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newUInt16Const(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newInt32Const(ConstPoolScope scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newUInt32Const(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newInt64Const(ConstPoolScope scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newUInt64Const(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a SP-FP `val` to a constant-pool.
+  inline Mem newFloatConst(ConstPoolScope scope, float val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DP-FP `val` to a constant-pool.
+  inline Mem newDoubleConst(ConstPoolScope scope, double val) noexcept { return newConst(scope, &val, 8); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Force the compiler to not follow the conditional or unconditional jump.
+  inline Compiler& unfollow() noexcept { _instOptions |= InstOptions::kUnfollow; return *this; }
+
+  //! \}
+
+  //! \name Compiler specific
+  //! \{
+
+  //! Special pseudo-instruction that can be used to load a memory address into `o0` GP register.
+  //!
+  //! \note At the moment this instruction is only useful to load a stack allocated address into a GP register
+  //! for further use. It makes very little sense to use it for anything else. The semantics of this instruction
+  //! is the same as X86 `LEA` (load effective address) instruction.
+  inline Error loadAddressOf(const Gp& o0, const Mem& o1) { return _emitter()->_emitI(Inst::kIdAdr, o0, o1); }
+
+  //! \}
+
+  //! \name Function Call & Ret Intrinsics
+  //! \{
+
+  //! Invoke a function call without `target` type enforcement.
+  inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) {
+    return addInvokeNode(out, Inst::kIdBlr, target, signature);
+  }
+
+  //! Invoke a function call of the given `target` and `signature` and store the added node to `out`.
+  //!
+  //! Creates a new \ref InvokeNode, initializes all the necessary members to match the given function `signature`,
+  //! adds the node to the compiler, and stores its pointer to `out`. The operation is atomic, if anything fails
+  //! nullptr is stored in `out` and error code is returned.
+  inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
+
+  //! Return.
+  inline Error ret() { return addRet(Operand(), Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0) { return addRet(o0, Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
+
+  //! \}
+
+  //! \name Jump Tables Support
+  //! \{
+
+  using EmitterExplicitT<Compiler>::br;
+
+  //! Adds a jump to the given `target` with the provided jump `annotation`.
+  inline Error br(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdBr, target, annotation); }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_ARM_ARMCOMPILER_H_INCLUDED
--- a/src/asmjit/arm/a64emithelper.cpp
+++ b/src/asmjit/arm/a64emithelper.cpp
@ -0,0 +1,464 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64formatter_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::EmitHelper - Emit Operations
+// =================================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
+  const Operand_& dst_,
+  const Operand_& src_, TypeId typeId, const char* comment) {
+
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(typeId) && !TypeUtils::isAbstract(typeId));
+
+  emitter->setInlineComment(comment);
+
+  if (dst_.isReg() && src_.isMem()) {
+    Reg dst(dst_.as<Reg>());
+    Mem src(src_.as<Mem>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+        return emitter->ldrb(dst.as<Gp>(), src);
+
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+        return emitter->ldrh(dst.as<Gp>(), src);
+
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+        return emitter->ldr(dst.as<Gp>().w(), src);
+
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->ldr(dst.as<Gp>().x(), src);
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->ldr(dst.as<Vec>().s(), src);
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->ldr(dst.as<Vec>().d(), src);
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->ldr(dst.as<Vec>().q(), src);
+
+        break;
+      }
+    }
+  }
+
+  if (dst_.isMem() && src_.isReg()) {
+    Mem dst(dst_.as<Mem>());
+    Reg src(src_.as<Reg>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+        return emitter->strb(src.as<Gp>(), dst);
+
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+        return emitter->strh(src.as<Gp>(), dst);
+
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+        return emitter->str(src.as<Gp>().w(), dst);
+
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->str(src.as<Gp>().x(), dst);
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->str(src.as<Vec>().s(), dst);
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->str(src.as<Vec>().d(), dst);
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->str(src.as<Vec>().q(), dst);
+
+        break;
+      }
+    }
+  }
+
+  if (dst_.isReg() && src_.isReg()) {
+    Reg dst(dst_.as<Reg>());
+    Reg src(src_.as<Reg>());
+
+    switch (typeId) {
+      case TypeId::kInt8:
+      case TypeId::kUInt8:
+      case TypeId::kInt16:
+      case TypeId::kUInt16:
+      case TypeId::kInt32:
+      case TypeId::kUInt32:
+      case TypeId::kInt64:
+      case TypeId::kUInt64:
+        return emitter->mov(dst.as<Gp>().x(), src.as<Gp>().x());
+
+      default: {
+        if (TypeUtils::isFloat32(typeId) || TypeUtils::isVec32(typeId))
+          return emitter->fmov(dst.as<Vec>().s(), src.as<Vec>().s());
+
+        if (TypeUtils::isFloat64(typeId) || TypeUtils::isVec64(typeId))
+          return emitter->mov(dst.as<Vec>().b8(), src.as<Vec>().b8());
+
+        if (TypeUtils::isVec128(typeId))
+          return emitter->mov(dst.as<Vec>().b16(), src.as<Vec>().b16());
+
+        break;
+      }
+    }
+  }
+
+  emitter->setInlineComment(nullptr);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+Error EmitHelper::emitRegSwap(
+  const BaseReg& a,
+  const BaseReg& b, const char* comment) {
+
+  DebugUtils::unused(a, b, comment);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// TODO: [ARM] EmitArgMove is unfinished.
+Error EmitHelper::emitArgMove(
+  const BaseReg& dst_, TypeId dstTypeId,
+  const Operand_& src_, TypeId srcTypeId, const char* comment) {
+
+  // Deduce optional `dstTypeId`, which may be `TypeId::kVoid` in some cases.
+  if (dstTypeId == TypeId::kVoid) {
+    const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch());
+    dstTypeId = archTraits.regTypeToTypeId(dst_.type());
+  }
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(dstTypeId) && !TypeUtils::isAbstract(dstTypeId));
+  ASMJIT_ASSERT(TypeUtils::isValid(srcTypeId) && !TypeUtils::isAbstract(srcTypeId));
+
+  Reg dst(dst_.as<Reg>());
+  Operand src(src_);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+
+  if (TypeUtils::isInt(dstTypeId)) {
+    if (TypeUtils::isInt(srcTypeId)) {
+      uint32_t x = dstSize == 8;
+
+      dst.setSignature(OperandSignature{x ? uint32_t(GpX::kSignature) : uint32_t(GpW::kSignature)});
+      _emitter->setInlineComment(comment);
+
+      if (src.isReg()) {
+        src.setSignature(dst.signature());
+        return _emitter->emit(Inst::kIdMov, dst, src);
+      }
+      else if (src.isMem()) {
+        InstId instId = Inst::kIdNone;
+          switch (srcTypeId) {
+          case TypeId::kInt8: instId = Inst::kIdLdrsb; break;
+          case TypeId::kUInt8: instId = Inst::kIdLdrb; break;
+          case TypeId::kInt16: instId = Inst::kIdLdrsh; break;
+          case TypeId::kUInt16: instId = Inst::kIdLdrh; break;
+          case TypeId::kInt32: instId = x ? Inst::kIdLdrsw : Inst::kIdLdr; break;
+          case TypeId::kUInt32: instId = Inst::kIdLdr; x = 0; break;
+          case TypeId::kInt64: instId = Inst::kIdLdr; break;
+          case TypeId::kUInt64: instId = Inst::kIdLdr; break;
+          default:
+            return DebugUtils::errored(kErrorInvalidState);
+        }
+        return _emitter->emit(instId, dst, src);
+      }
+    }
+  }
+
+  if (TypeUtils::isFloat(dstTypeId) || TypeUtils::isVec(dstTypeId)) {
+    if (TypeUtils::isFloat(srcTypeId) || TypeUtils::isVec(srcTypeId)) {
+      switch (srcSize) {
+        case 2: dst.as<Vec>().setSignature(OperandSignature{VecH::kSignature}); break;
+        case 4: dst.as<Vec>().setSignature(OperandSignature{VecS::kSignature}); break;
+        case 8: dst.as<Vec>().setSignature(OperandSignature{VecD::kSignature}); break;
+        case 16: dst.as<Vec>().setSignature(OperandSignature{VecV::kSignature}); break;
+        default:
+          return DebugUtils::errored(kErrorInvalidState);
+      }
+
+      _emitter->setInlineComment(comment);
+
+      if (src.isReg()) {
+        InstId instId = srcSize <= 4 ? Inst::kIdFmov_v : Inst::kIdMov_v;
+        src.setSignature(dst.signature());
+        return _emitter->emit(instId, dst, src);
+      }
+      else if (src.isMem()) {
+        return _emitter->emit(Inst::kIdLdr_v, dst, src);
+      }
+    }
+  }
+
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// a64::EmitHelper - Emit Prolog & Epilog
+// ======================================
+
+struct LoadStoreInstructions {
+  InstId singleInstId;
+  InstId pairInstId;
+};
+
+struct PrologEpilogInfo {
+  struct RegPair {
+    uint8_t ids[2];
+    uint16_t offset;
+  };
+
+  struct GroupData {
+    RegPair pairs[16];
+    uint32_t pairCount;
+  };
+
+  Support::Array<GroupData, 2> groups;
+  uint32_t sizeTotal;
+
+  Error init(const FuncFrame& frame) noexcept {
+    uint32_t offset = 0;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kVec>{}) {
+      GroupData& data = groups[group];
+
+      uint32_t n = 0;
+      uint32_t pairCount = 0;
+      RegPair* pairs = data.pairs;
+
+      uint32_t slotSize = frame.saveRestoreRegSize(group);
+      uint32_t savedRegs = frame.savedRegs(group);
+
+      if (group == RegGroup::kGp && frame.hasPreservedFP()) {
+        // Must be at the beginning of the push/pop sequence.
+        ASMJIT_ASSERT(pairCount == 0);
+
+        pairs[0].offset = uint16_t(offset);
+        pairs[0].ids[0] = Gp::kIdFp;
+        pairs[0].ids[1] = Gp::kIdLr;
+        offset += slotSize * 2;
+        pairCount++;
+
+        savedRegs &= ~Support::bitMask(Gp::kIdFp, Gp::kIdLr);
+      }
+
+      Support::BitWordIterator<uint32_t> it(savedRegs);
+      while (it.hasNext()) {
+        pairs[pairCount].ids[n] = uint8_t(it.next());
+
+        if (++n == 2) {
+          pairs[pairCount].offset = uint16_t(offset);
+          offset += slotSize * 2;
+
+          n = 0;
+          pairCount++;
+        }
+      }
+
+      if (n == 1) {
+        pairs[pairCount].ids[1] = uint8_t(BaseReg::kIdBad);
+        pairs[pairCount].offset = uint16_t(offset);
+        offset += slotSize * 2;
+        pairCount++;
+      }
+
+      data.pairCount = pairCount;
+    }
+
+    sizeTotal = offset;
+    return kErrorOk;
+  }
+};
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  PrologEpilogInfo pei;
+  ASMJIT_PROPAGATE(pei.init(frame));
+
+  static const Support::Array<Reg, 2> groupRegs = {{ x0, d0 }};
+  static const Support::Array<LoadStoreInstructions, 2> groupInsts = {{
+    { Inst::kIdStr  , Inst::kIdStp   },
+    { Inst::kIdStr_v, Inst::kIdStp_v }
+  }};
+
+  uint32_t adjustInitialOffset = pei.sizeTotal;
+
+  for (RegGroup group : Support::EnumValues<RegGroup, RegGroup::kGp, RegGroup::kVec>{}) {
+    const PrologEpilogInfo::GroupData& data = pei.groups[group];
+    uint32_t pairCount = data.pairCount;
+
+    Reg regs[2] = { groupRegs[group], groupRegs[group] };
+    Mem mem = ptr(sp);
+
+    const LoadStoreInstructions& insts = groupInsts[group];
+    for (uint32_t i = 0; i < pairCount; i++) {
+      const PrologEpilogInfo::RegPair& pair = data.pairs[i];
+
+      regs[0].setId(pair.ids[0]);
+      regs[1].setId(pair.ids[1]);
+      mem.setOffsetLo32(pair.offset);
+
+      if (pair.offset == 0 && adjustInitialOffset) {
+        mem.setOffset(-int(adjustInitialOffset));
+        mem.makePreIndex();
+      }
+
+      if (pair.ids[1] == BaseReg::kIdBad)
+        ASMJIT_PROPAGATE(emitter->emit(insts.singleInstId, regs[0], mem));
+      else
+        ASMJIT_PROPAGATE(emitter->emit(insts.pairInstId, regs[0], regs[1], mem));
+
+      mem.resetToFixedOffset();
+
+      if (i == 0 && frame.hasPreservedFP()) {
+        ASMJIT_PROPAGATE(emitter->mov(x29, sp));
+      }
+    }
+  }
+
+  if (frame.hasStackAdjustment()) {
+    uint32_t adj = frame.stackAdjustment();
+    if (adj <= 0xFFFu) {
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj));
+    }
+    else if (adj <= 0xFFFFFFu)  {
+      // TODO: [ARM] Prolog - we must touch the pages otherwise it's undefined.
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj & 0x000FFFu));
+      ASMJIT_PROPAGATE(emitter->sub(sp, sp, adj & 0xFFF000u));
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// TODO: [ARM] Emit epilog.
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  PrologEpilogInfo pei;
+  ASMJIT_PROPAGATE(pei.init(frame));
+
+  static const Support::Array<Reg, 2> groupRegs = {{ x0, d0 }};
+  static const Support::Array<LoadStoreInstructions, 2> groupInsts = {{
+    { Inst::kIdLdr  , Inst::kIdLdp   },
+    { Inst::kIdLdr_v, Inst::kIdLdp_v }
+  }};
+
+  uint32_t adjustInitialOffset = pei.sizeTotal;
+
+  if (frame.hasStackAdjustment()) {
+    uint32_t adj = frame.stackAdjustment();
+    if (adj <= 0xFFFu) {
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj));
+    }
+    else if (adj <= 0xFFFFFFu)  {
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj & 0x000FFFu));
+      ASMJIT_PROPAGATE(emitter->add(sp, sp, adj & 0xFFF000u));
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidState);
+    }
+  }
+
+  for (int g = 1; g >= 0; g--) {
+    RegGroup group = RegGroup(g);
+    const PrologEpilogInfo::GroupData& data = pei.groups[group];
+    uint32_t pairCount = data.pairCount;
+
+    Reg regs[2] = { groupRegs[group], groupRegs[group] };
+    Mem mem = ptr(sp);
+
+    const LoadStoreInstructions& insts = groupInsts[group];
+
+    for (int i = int(pairCount) - 1; i >= 0; i--) {
+      const PrologEpilogInfo::RegPair& pair = data.pairs[i];
+
+      regs[0].setId(pair.ids[0]);
+      regs[1].setId(pair.ids[1]);
+      mem.setOffsetLo32(pair.offset);
+
+      if (pair.offset == 0 && adjustInitialOffset) {
+        mem.setOffset(int(adjustInitialOffset));
+        mem.makePostIndex();
+      }
+
+      if (pair.ids[1] == BaseReg::kIdBad)
+        ASMJIT_PROPAGATE(emitter->emit(insts.singleInstId, regs[0], mem));
+      else
+        ASMJIT_PROPAGATE(emitter->emit(insts.pairInstId, regs[0], regs[1], mem));
+
+      mem.resetToFixedOffset();
+    }
+  }
+
+  ASMJIT_PROPAGATE(emitter->ret(x30));
+
+  return kErrorOk;
+}
+
+static Error ASMJIT_CDECL Emitter_emitProlog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitProlog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitEpilog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitEpilog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitArgsAssignment(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
+  EmitHelper emitHelper(emitter);
+  return emitHelper.emitArgsAssignment(frame, args);
+}
+
+void assignEmitterFuncs(BaseEmitter* emitter) {
+  emitter->_funcs.emitProlog = Emitter_emitProlog;
+  emitter->_funcs.emitEpilog = Emitter_emitEpilog;
+  emitter->_funcs.emitArgsAssignment = Emitter_emitArgsAssignment;
+
+#ifndef ASMJIT_NO_LOGGING
+  emitter->_funcs.formatInstruction = FormatterInternal::formatInstruction;
+#endif
+
+#ifndef ASMJIT_NO_VALIDATION
+  emitter->_funcs.validate = InstInternal::validate;
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
--- a/src/asmjit/arm/a64emithelper_p.h
+++ b/src/asmjit/arm/a64emithelper_p.h
@ -0,0 +1,50 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
+#define ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+#include "../core/emithelper_p.h"
+#include "../core/func.h"
+#include "../arm/a64emitter.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+class EmitHelper : public BaseEmitHelper {
+public:
+  inline explicit EmitHelper(BaseEmitter* emitter = nullptr) noexcept
+    : BaseEmitHelper(emitter) {}
+
+  Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) override;
+
+  Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) override;
+
+  Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) override;
+
+  Error emitProlog(const FuncFrame& frame);
+  Error emitEpilog(const FuncFrame& frame);
+};
+
+void assignEmitterFuncs(BaseEmitter* emitter);
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_ARMEMITHELPER_P_H_INCLUDED
--- a/src/asmjit/arm/a64emitter.h
+++ b/src/asmjit/arm/a64emitter.h
--- a/src/asmjit/arm/a64formatter.cpp
+++ b/src/asmjit/arm/a64formatter.cpp
@ -0,0 +1,298 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/a64formatter_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::FormatterInternal - Format Register
+// ========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t rId,
+  uint32_t elementType,
+  uint32_t elementIndex) noexcept {
+
+  DebugUtils::unused(flags);
+  DebugUtils::unused(arch);
+
+  static const char bhsdq[] = "bhsdq";
+
+  bool virtRegFormatted = false;
+
+#ifndef ASMJIT_NO_COMPILER
+  if (Operand::isVirtId(rId)) {
+    if (emitter && emitter->isCompiler()) {
+      const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
+      if (cc->isVirtIdValid(rId)) {
+        VirtReg* vReg = cc->virtRegById(rId);
+        ASMJIT_ASSERT(vReg != nullptr);
+
+        const char* name = vReg->name();
+        if (name && name[0] != '\0')
+          ASMJIT_PROPAGATE(sb.append(name));
+        else
+          ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(rId))));
+
+        virtRegFormatted = true;
+      }
+    }
+  }
+#else
+  DebugUtils::unused(emitter, flags);
+#endif
+
+  if (!virtRegFormatted) {
+    char letter = '\0';
+    switch (regType) {
+      case RegType::kARM_GpW:
+        if (rId == Gp::kIdZr)
+          return sb.append("wzr");
+        if (rId == Gp::kIdSp)
+          return sb.append("wsp");
+
+        letter = 'w';
+        break;
+
+      case RegType::kARM_GpX:
+        if (rId == Gp::kIdZr)
+          return sb.append("xzr");
+        if (rId == Gp::kIdSp)
+          return sb.append("sp");
+
+        letter = 'x';
+        break;
+
+      case RegType::kARM_VecB:
+      case RegType::kARM_VecH:
+      case RegType::kARM_VecS:
+      case RegType::kARM_VecD:
+      case RegType::kARM_VecV:
+        letter = bhsdq[uint32_t(regType) - uint32_t(RegType::kARM_VecB)];
+        if (elementType)
+          letter = 'v';
+        break;
+
+      default:
+        ASMJIT_PROPAGATE(sb.appendFormat("<Reg-%u>?$u", uint32_t(regType), rId));
+        break;
+    }
+
+    if (letter)
+      ASMJIT_PROPAGATE(sb.appendFormat("%c%u", letter, rId));
+  }
+
+  if (elementType) {
+    char elementLetter = '\0';
+    uint32_t elementCount = 0;
+
+    switch (elementType) {
+      case Vec::kElementTypeB:
+        elementLetter = 'b';
+        elementCount = 16;
+        break;
+
+      case Vec::kElementTypeH:
+        elementLetter = 'h';
+        elementCount = 8;
+        break;
+
+      case Vec::kElementTypeS:
+        elementLetter = 's';
+        elementCount = 4;
+        break;
+
+      case Vec::kElementTypeD:
+        elementLetter = 'd';
+        elementCount = 2;
+        break;
+
+      default:
+        return sb.append(".<Unknown>");
+    }
+
+    if (elementLetter) {
+      if (elementIndex == 0xFFFFFFFFu) {
+        if (regType == RegType::kARM_VecD)
+          elementCount /= 2u;
+        ASMJIT_PROPAGATE(sb.appendFormat(".%u%c", elementCount, elementLetter));
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.appendFormat(".%c[%u]", elementLetter, elementIndex));
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+// a64::FormatterInternal - Format Operand
+// =======================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+  if (op.isReg()) {
+    const BaseReg& reg = op.as<BaseReg>();
+
+    uint32_t elementType = op.as<Vec>().elementType();
+    uint32_t elementIndex = op.as<Vec>().elementIndex();
+
+    if (!op.as<Vec>().hasElementIndex())
+      elementIndex = 0xFFFFFFFFu;
+
+    return formatRegister(sb, flags, emitter, arch, reg.type(), reg.id(), elementType, elementIndex);
+  }
+
+  if (op.isMem()) {
+    const Mem& m = op.as<Mem>();
+    ASMJIT_PROPAGATE(sb.append('['));
+
+    if (m.hasBase()) {
+      if (m.hasBaseLabel()) {
+        ASMJIT_PROPAGATE(Formatter::formatLabel(sb, flags, emitter, m.baseId()));
+      }
+      else {
+        FormatFlags modifiedFlags = flags;
+        if (m.isRegHome()) {
+          ASMJIT_PROPAGATE(sb.append('&'));
+          modifiedFlags &= ~FormatFlags::kRegCasts;
+        }
+        ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId()));
+      }
+    }
+    else {
+      // ARM really requires base.
+      if (m.hasIndex() || m.hasOffset()) {
+        ASMJIT_PROPAGATE(sb.append("<None>"));
+      }
+    }
+
+    // The post index makes it look like there was another operand, but it's
+    // still the part of AsmJit's `arm::Mem` operand so it's consistent with
+    // other architectures.
+    if (m.isPostIndex())
+      ASMJIT_PROPAGATE(sb.append(']'));
+
+    if (m.hasIndex()) {
+      ASMJIT_PROPAGATE(sb.append(", "));
+      ASMJIT_PROPAGATE(formatRegister(sb, flags, emitter, arch, m.indexType(), m.indexId()));
+    }
+
+    if (m.hasOffset()) {
+      ASMJIT_PROPAGATE(sb.append(", "));
+
+      int64_t off = int64_t(m.offset());
+      uint32_t base = 10;
+
+      if (Support::test(flags, FormatFlags::kHexOffsets) && uint64_t(off) > 9)
+        base = 16;
+
+      if (base == 10) {
+        ASMJIT_PROPAGATE(sb.appendInt(off, base));
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append("0x"));
+        ASMJIT_PROPAGATE(sb.appendUInt(uint64_t(off), base));
+      }
+    }
+
+    if (m.hasShift()) {
+      ASMJIT_PROPAGATE(sb.append(' '));
+      if (!m.isPreOrPost())
+        ASMJIT_PROPAGATE(formatShiftOp(sb, (ShiftOp)m.predicate()));
+      ASMJIT_PROPAGATE(sb.appendFormat(" %u", m.shift()));
+    }
+
+    if (!m.isPostIndex())
+      ASMJIT_PROPAGATE(sb.append(']'));
+
+    if (m.isPreIndex())
+      ASMJIT_PROPAGATE(sb.append('!'));
+
+    return kErrorOk;
+  }
+
+  if (op.isImm()) {
+    const Imm& i = op.as<Imm>();
+    int64_t val = i.value();
+
+    if (Support::test(flags, FormatFlags::kHexImms) && uint64_t(val) > 9) {
+      ASMJIT_PROPAGATE(sb.append("0x"));
+      return sb.appendUInt(uint64_t(val), 16);
+    }
+    else {
+      return sb.appendInt(val, 10);
+    }
+  }
+
+  if (op.isLabel()) {
+    return Formatter::formatLabel(sb, flags, emitter, op.id());
+  }
+
+  return sb.append("<None>");
+}
+
+// a64::FormatterInternal - Format Instruction
+// ===========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+  DebugUtils::unused(arch);
+
+  // Format instruction options and instruction mnemonic.
+  InstId instId = inst.realId();
+  if (instId < Inst::_kIdCount)
+    ASMJIT_PROPAGATE(InstInternal::instIdToString(arch, instId, sb));
+  else
+    ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
+
+  CondCode cc = inst.armCondCode();
+  if (cc != CondCode::kAL) {
+    ASMJIT_PROPAGATE(sb.append('.'));
+    ASMJIT_PROPAGATE(formatCondCode(sb, cc));
+  }
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isNone())
+      break;
+
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+    ASMJIT_PROPAGATE(formatOperand(sb, flags, emitter, arch, op));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
--- a/src/asmjit/arm/a64formatter_p.h
+++ b/src/asmjit/arm/a64formatter_p.h
@ -0,0 +1,59 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
+#define ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace FormatterInternal {
+
+using namespace arm::FormatterInternal;
+
+Error ASMJIT_CDECL formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId,
+  uint32_t elementType = 0,
+  uint32_t elementIndex = 0xFFFFFFFFu) noexcept;
+
+Error ASMJIT_CDECL formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+Error ASMJIT_CDECL formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_ARM_A64FORMATTER_P_H_INCLUDED
--- a/src/asmjit/arm/a64func.cpp
+++ b/src/asmjit/arm/a64func.cpp
@ -0,0 +1,189 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../arm/a64func_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+namespace FuncInternal {
+
+static inline bool shouldThreatAsCDecl(CallConvId ccId) noexcept {
+  return ccId == CallConvId::kCDecl ||
+         ccId == CallConvId::kStdCall ||
+         ccId == CallConvId::kFastCall ||
+         ccId == CallConvId::kVectorCall ||
+         ccId == CallConvId::kThisCall ||
+         ccId == CallConvId::kRegParm1 ||
+         ccId == CallConvId::kRegParm2 ||
+         ccId == CallConvId::kRegParm3;
+}
+
+static RegType regTypeFromFpOrVecTypeId(TypeId typeId) noexcept {
+  if (typeId == TypeId::kFloat32)
+    return RegType::kARM_VecS;
+  else if (typeId == TypeId::kFloat64)
+    return RegType::kARM_VecD;
+  else if (TypeUtils::isVec32(typeId))
+    return RegType::kARM_VecS;
+  else if (TypeUtils::isVec64(typeId))
+    return RegType::kARM_VecD;
+  else if (TypeUtils::isVec128(typeId))
+    return RegType::kARM_VecV;
+  else
+    return RegType::kNone;
+}
+
+ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept {
+  cc.setArch(environment.arch());
+
+  cc.setSaveRestoreRegSize(RegGroup::kGp, 8);
+  cc.setSaveRestoreRegSize(RegGroup::kVec, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kGp, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kVec, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kExtraVirt2, 1);
+  cc.setSaveRestoreAlignment(RegGroup::kExtraVirt3, 1);
+  cc.setPassedOrder(RegGroup::kGp, 0, 1, 2, 3, 4, 5, 6, 7);
+  cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+  cc.setNaturalStackAlignment(16);
+
+  if (shouldThreatAsCDecl(ccId)) {
+    // ARM doesn't have that many calling conventions as we can find in X86 world, treat most conventions as __cdecl.
+    cc.setId(CallConvId::kCDecl);
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdOs, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
+    cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(8, 9, 10, 11, 12, 13, 14, 15));
+  }
+  else {
+    cc.setId(ccId);
+    cc.setSaveRestoreRegSize(RegGroup::kVec, 16);
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30));
+    cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31));
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
+  DebugUtils::unused(signature);
+
+  const CallConv& cc = func.callConv();
+  uint32_t stackOffset = 0;
+
+  uint32_t i;
+  uint32_t argCount = func.argCount();
+
+  if (func.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      TypeId typeId = func._rets[valueIndex].typeId();
+
+      // Terminate at the first void type (end of the pack).
+      if (typeId == TypeId::kVoid)
+        break;
+
+      switch (typeId) {
+        case TypeId::kInt8:
+        case TypeId::kInt16:
+        case TypeId::kInt32: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpW, valueIndex, TypeId::kInt32);
+          break;
+        }
+
+        case TypeId::kUInt8:
+        case TypeId::kUInt16:
+        case TypeId::kUInt32: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpW, valueIndex, TypeId::kUInt32);
+          break;
+        }
+
+        case TypeId::kInt64:
+        case TypeId::kUInt64: {
+          func._rets[valueIndex].initReg(RegType::kARM_GpX, valueIndex, typeId);
+          break;
+        }
+
+        default: {
+          RegType regType = regTypeFromFpOrVecTypeId(typeId);
+          if (regType == RegType::kNone)
+            return DebugUtils::errored(kErrorInvalidRegType);
+
+          func._rets[valueIndex].initReg(regType, valueIndex, typeId);
+          break;
+        }
+      }
+    }
+  }
+
+  switch (cc.strategy()) {
+    case CallConvStrategy::kDefault: {
+      uint32_t gpzPos = 0;
+      uint32_t vecPos = 0;
+
+      for (i = 0; i < argCount; i++) {
+        FuncValue& arg = func._args[i][0];
+        TypeId typeId = arg.typeId();
+
+        if (TypeUtils::isInt(typeId)) {
+          uint32_t regId = BaseReg::kIdBad;
+
+          if (gpzPos < CallConv::kMaxRegArgsPerGroup)
+            regId = cc._passedOrder[RegGroup::kGp].id[gpzPos];
+
+          if (regId != BaseReg::kIdBad) {
+            RegType regType = typeId <= TypeId::kUInt32 ? RegType::kARM_GpW : RegType::kARM_GpX;
+            arg.assignRegData(regType, regId);
+            func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+            gpzPos++;
+          }
+          else {
+            uint32_t size = Support::max<uint32_t>(TypeUtils::sizeOf(typeId), registerSize);
+            arg.assignStackOffset(int32_t(stackOffset));
+            stackOffset += size;
+          }
+          continue;
+        }
+
+        if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+          uint32_t regId = BaseReg::kIdBad;
+
+          if (vecPos < CallConv::kMaxRegArgsPerGroup)
+            regId = cc._passedOrder[RegGroup::kVec].id[vecPos];
+
+          if (regId != BaseReg::kIdBad) {
+            RegType regType = regTypeFromFpOrVecTypeId(typeId);
+            if (regType == RegType::kNone)
+              return DebugUtils::errored(kErrorInvalidRegType);
+
+            arg.initTypeId(typeId);
+            arg.assignRegData(regType, regId);
+            func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+            vecPos++;
+          }
+          else {
+            uint32_t size = TypeUtils::sizeOf(typeId);
+            arg.assignStackOffset(int32_t(stackOffset));
+            stackOffset += size;
+          }
+          continue;
+        }
+      }
+      break;
+    }
+
+    default:
+      return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  func._argStackSize = stackOffset;
+  return kErrorOk;
+}
+
+} // {FuncInternal}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
--- a/src/asmjit/arm/a64func_p.h
+++ b/src/asmjit/arm/a64func_p.h
@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64FUNC_P_H_INCLUDED
+#define ASMJIT_ARM_A64FUNC_P_H_INCLUDED
+
+#include "../core/func.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+//! AArch64-specific function API (calling conventions and other utilities).
+namespace FuncInternal {
+
+//! Initialize `CallConv` structure (AArch64 specific).
+Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept;
+
+//! Initialize `FuncDetail` (AArch64 specific).
+Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept;
+
+} // {FuncInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64FUNC_P_H_INCLUDED
--- a/src/asmjit/arm/a64globals.h
+++ b/src/asmjit/arm/a64globals.h
--- a/src/asmjit/arm/a64instapi.cpp
+++ b/src/asmjit/arm/a64instapi.cpp
@ -0,0 +1,278 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::InstInternal - Text
+// ========================
+
+#ifndef ASMJIT_NO_TEXT
+Error InstInternal::instIdToString(Arch arch, InstId instId, String& output) noexcept {
+  uint32_t realId = instId & uint32_t(InstIdParts::kRealId);
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  const InstDB::InstInfo& info = InstDB::infoById(realId);
+  return output.append(InstDB::_nameData + info._nameDataIndex);
+}
+
+InstId InstInternal::stringToInstId(Arch arch, const char* s, size_t len) noexcept {
+  DebugUtils::unused(arch);
+
+  if (ASMJIT_UNLIKELY(!s))
+    return Inst::kIdNone;
+
+  if (len == SIZE_MAX)
+    len = strlen(s);
+
+  if (ASMJIT_UNLIKELY(len == 0 || len > InstDB::kMaxNameSize))
+    return Inst::kIdNone;
+
+  uint32_t prefix = uint32_t(s[0]) - 'a';
+  if (ASMJIT_UNLIKELY(prefix > 'z' - 'a'))
+    return Inst::kIdNone;
+
+  uint32_t index = InstDB::instNameIndex[prefix].start;
+  if (ASMJIT_UNLIKELY(!index))
+    return Inst::kIdNone;
+
+  const char* nameData = InstDB::_nameData;
+  const InstDB::InstInfo* table = InstDB::_instInfoTable;
+
+  const InstDB::InstInfo* base = table + index;
+  const InstDB::InstInfo* end  = table + InstDB::instNameIndex[prefix].end;
+
+  for (size_t lim = (size_t)(end - base); lim != 0; lim >>= 1) {
+    const InstDB::InstInfo* cur = base + (lim >> 1);
+    int result = Support::cmpInstName(nameData + cur[0]._nameDataIndex, s, len);
+
+    if (result < 0) {
+      base = cur + 1;
+      lim--;
+      continue;
+    }
+
+    if (result > 0)
+      continue;
+
+    return uint32_t((size_t)(cur - table));
+  }
+
+  return Inst::kIdNone;
+}
+#endif // !ASMJIT_NO_TEXT
+
+// a64::InstInternal - Validate
+// ============================
+
+#ifndef ASMJIT_NO_VALIDATION
+ASMJIT_FAVOR_SIZE Error InstInternal::validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept {
+  // TODO:
+  DebugUtils::unused(arch, inst, operands, opCount, validationFlags);
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_VALIDATION
+
+// a64::InstInternal - QueryRWInfo
+// ===============================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+struct InstRWInfoData {
+  uint8_t rwx[Globals::kMaxOpCount];
+};
+
+static const InstRWInfoData instRWInfoData[] = {
+  #define R uint8_t(OpRWFlags::kRead)
+  #define W uint8_t(OpRWFlags::kWrite)
+  #define X uint8_t(OpRWFlags::kRW)
+
+  {{ R, R, R, R, R, R }}, // kRWI_R
+  {{ R, W, R, R, R, R }}, // kRWI_RW
+  {{ R, X, R, R, R, R }}, // kRWI_RX
+  {{ R, R, W, R, R, R }}, // kRWI_RRW
+  {{ R, W, X, R, R, R }}, // kRWI_RWX
+  {{ W, R, R, R, R, R }}, // kRWI_W
+  {{ W, R, W, R, R, R }}, // kRWI_WRW
+  {{ W, R, X, R, R, R }}, // kRWI_WRX
+  {{ W, R, R, W, R, R }}, // kRWI_WRRW
+  {{ W, R, R, X, R, R }}, // kRWI_WRRX
+  {{ W, W, R, R, R, R }}, // kRWI_WW
+  {{ X, R, R, R, R, R }}, // kRWI_X
+  {{ X, R, X, R, R, R }}, // kRWI_XRX
+  {{ X, X, R, R, X, R }}, // kRWI_XXRRX
+
+  {{ W, R, R, R, R, R }}, // kRWI_LDn
+  {{ R, W, R, R, R, R }}, // kRWI_STn
+  {{ R, R, R, R, R, R }}  // kRWI_TODO
+
+  #undef R
+  #undef W
+  #undef X
+};
+
+static const uint8_t elementTypeSize[8] = { 0, 1, 2, 4, 8, 4, 4, 0 };
+
+Error InstInternal::queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept {
+  // Unused in Release configuration as the assert is not compiled in.
+  DebugUtils::unused(arch);
+
+  // Only called when `arch` matches X86 family.
+  ASMJIT_ASSERT(Environment::isFamilyARM(arch));
+
+  // Get the instruction data.
+  uint32_t realId = inst.id() & uint32_t(InstIdParts::kRealId);
+
+  if (ASMJIT_UNLIKELY(!Inst::isDefinedId(realId)))
+    return DebugUtils::errored(kErrorInvalidInstruction);
+
+  out->_instFlags = InstRWFlags::kNone;
+  out->_opCount = uint8_t(opCount);
+  out->_rmFeature = 0;
+  out->_extraReg.reset();
+  out->_readFlags = CpuRWFlags::kNone; // TODO: [ARM] Read PSTATUS.
+  out->_writeFlags = CpuRWFlags::kNone; // TODO: [ARM] Write PSTATUS
+
+  const InstDB::InstInfo& instInfo = InstDB::_instInfoTable[realId];
+  const InstRWInfoData& rwInfo = instRWInfoData[instInfo.rwInfoIndex()];
+
+  if (instInfo.hasFlag(InstDB::kInstFlagConsecutive) && opCount > 2) {
+    for (uint32_t i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      OpRWFlags rwFlags = i < opCount - 1 ? (OpRWFlags)rwInfo.rwx[0] : (OpRWFlags)rwInfo.rwx[1];
+
+      op._opFlags = rwFlags & ~(OpRWFlags::kZExt);
+      op._physId = BaseReg::kIdBad;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = op.isRead() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+      uint64_t wByteMask = op.isWrite() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = 0;
+
+      if (srcOp.isReg()) {
+        if (i == 0)
+          op._consecutiveLeadCount = uint8_t(opCount - 1);
+        else
+          op.addOpFlags(OpRWFlags::kConsecutive);
+      }
+      else {
+        const Mem& memOp = srcOp.as<Mem>();
+
+        if (memOp.hasBase()) {
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        }
+
+        if (memOp.hasIndex()) {
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+          op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexWrite : OpRWFlags::kNone);
+        }
+      }
+    }
+  }
+  else {
+    for (uint32_t i = 0; i < opCount; i++) {
+      OpRWInfo& op = out->_operands[i];
+      const Operand_& srcOp = operands[i];
+
+      if (!srcOp.isRegOrMem()) {
+        op.reset();
+        continue;
+      }
+
+      OpRWFlags rwFlags = (OpRWFlags)rwInfo.rwx[i];
+
+      op._opFlags = rwFlags & ~(OpRWFlags::kZExt);
+      op._physId = BaseReg::kIdBad;
+      op._rmSize = 0;
+      op._resetReserved();
+
+      uint64_t rByteMask = op.isRead() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+      uint64_t wByteMask = op.isWrite() ? 0xFFFFFFFFFFFFFFFFu : 0x0000000000000000u;
+
+      op._readByteMask = rByteMask;
+      op._writeByteMask = wByteMask;
+      op._extendByteMask = 0;
+      op._consecutiveLeadCount = 0;
+
+      if (srcOp.isReg()) {
+        if (srcOp.as<Vec>().hasElementIndex()) {
+          // Only part of the vector is accessed if element index [] is used.
+          uint32_t elementType = srcOp.as<Vec>().elementType();
+          uint32_t elementIndex = srcOp.as<Vec>().elementIndex();
+
+          uint32_t elementSize = elementTypeSize[elementType];
+          uint64_t accessMask = uint64_t(Support::lsbMask<uint32_t>(elementSize)) << (elementIndex * elementSize);
+
+          op._readByteMask &= accessMask;
+          op._writeByteMask &= accessMask;
+        }
+
+        // TODO: [ARM] RW info is not finished.
+      }
+      else {
+        const Mem& memOp = srcOp.as<Mem>();
+
+        if (memOp.hasBase()) {
+          op.addOpFlags(OpRWFlags::kMemBaseRead);
+        }
+
+        if (memOp.hasIndex()) {
+          op.addOpFlags(OpRWFlags::kMemIndexRead);
+          op.addOpFlags(memOp.isPreOrPost() ? OpRWFlags::kMemIndexWrite : OpRWFlags::kNone);
+        }
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// a64::InstInternal - QueryFeatures
+// =================================
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error InstInternal::queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept {
+  // TODO: [ARM] QueryFeatures not implemented yet.
+  DebugUtils::unused(arch, inst, operands, opCount, out);
+  return kErrorOk;
+}
+#endif // !ASMJIT_NO_INTROSPECTION
+
+// a64::InstInternal - Unit
+// ========================
+
+#if defined(ASMJIT_TEST)
+UNIT(arm_inst_api_text) {
+  // TODO:
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
--- a/src/asmjit/arm/a64instapi_p.h
+++ b/src/asmjit/arm/a64instapi_p.h
@ -0,0 +1,41 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
+#define ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace InstInternal {
+
+#ifndef ASMJIT_NO_TEXT
+Error ASMJIT_CDECL instIdToString(Arch arch, InstId instId, String& output) noexcept;
+InstId ASMJIT_CDECL stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+Error ASMJIT_CDECL validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error ASMJIT_CDECL queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+Error ASMJIT_CDECL queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64INSTAPI_P_H_INCLUDED
--- a/src/asmjit/arm/a64instdb.cpp
+++ b/src/asmjit/arm/a64instdb.cpp
--- a/src/asmjit/arm/a64instdb.h
+++ b/src/asmjit/arm/a64instdb.h
@ -0,0 +1,74 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTDB_H_INCLUDED
+#define ASMJIT_ARM_A64INSTDB_H_INCLUDED
+
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! Instruction database (AArch64).
+namespace InstDB {
+
+//! Instruction flags.
+enum InstFlags : uint32_t {
+  //! The instruction provides conditional execution.
+  kInstFlagCond = 0x00000001u,
+  //! SIMD instruction that processes elements in pairs.
+  kInstFlagPair = 0x00000002u,
+  //! SIMD instruction that does widening (Long).
+  kInstFlagLong = 0x00000004u,
+  //! SIMD instruction that does narrowing (Narrow).
+  kInstFlagNarrow = 0x00000008u,
+  //! SIMD element access of half-words can only be used with v0..15.
+  kInstFlagVH0_15 = 0x00000010u,
+
+  //! Instruction may consecutive registers if the number of operands is greater than 2.
+  kInstFlagConsecutive = 0x00000080u
+};
+
+//! Instruction information (AArch64).
+struct InstInfo {
+  //! Instruction encoding type.
+  uint32_t _encoding : 8;
+  //! Index to data specific to each encoding type.
+  uint32_t _encodingDataIndex : 8;
+  uint32_t _reserved : 2;
+  //! Index to \ref _nameData.
+  uint32_t _nameDataIndex : 14;
+
+  uint16_t _rwInfoIndex;
+  uint16_t _flags;
+
+  //! \name Accessors
+  //! \{
+
+  inline uint32_t rwInfoIndex() const noexcept { return _rwInfoIndex; }
+  inline uint32_t flags() const noexcept { return _flags; }
+
+  inline bool hasFlag(uint32_t flag) const { return (_flags & flag) != 0; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstInfo _instInfoTable[];
+
+static inline const InstInfo& infoById(InstId instId) noexcept {
+  instId &= uint32_t(InstIdParts::kRealId);
+  ASMJIT_ASSERT(Inst::isDefinedId(instId));
+  return _instInfoTable[instId];
+}
+
+} // {InstDB}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64INSTDB_H_INCLUDED
--- a/src/asmjit/arm/a64instdb_p.h
+++ b/src/asmjit/arm/a64instdb_p.h
@ -0,0 +1,876 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64INSTDB_H_P_INCLUDED
+#define ASMJIT_ARM_A64INSTDB_H_P_INCLUDED
+
+#include "../core/codeholder.h"
+#include "../arm/a64instdb.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+namespace InstDB {
+
+// a64::InstDB - Constants Used by Instructions
+// ============================================
+
+// GP register types supported by base instructions.
+static constexpr uint32_t kW = 0x1;
+static constexpr uint32_t kX = 0x2;
+static constexpr uint32_t kWX = 0x3;
+
+// GP high register IDs supported by the instruction.
+static constexpr uint32_t kZR = Gp::kIdZr;
+static constexpr uint32_t kSP = Gp::kIdSp;
+
+// a64::InstDB - RWInfo
+// ====================
+
+enum RWInfoType : uint32_t {
+  kRWI_R,
+  kRWI_RW,
+  kRWI_RX,
+  kRWI_RRW,
+  kRWI_RWX,
+  kRWI_W,
+  kRWI_WRW,
+  kRWI_WRX,
+  kRWI_WRRW,
+  kRWI_WRRX,
+  kRWI_WW,
+  kRWI_X,
+  kRWI_XRX,
+  kRWI_XXRRX,
+
+  kRWI_LDn,
+  kRWI_STn,
+
+  kRWI_SpecialStart = kRWI_LDn
+};
+
+// a64::InstDB - ElementType
+// =========================
+
+enum ElementType : uint8_t {
+  kET_None = Vec::kElementTypeNone,
+  kET_B    = Vec::kElementTypeB,
+  kET_H    = Vec::kElementTypeH,
+  kET_S    = Vec::kElementTypeS,
+  kET_D    = Vec::kElementTypeD,
+  kET_2H   = Vec::kElementTypeH2,
+  kET_4B   = Vec::kElementTypeB4
+};
+
+// a64::InstDB - GpType
+// ====================
+
+enum GpType : uint8_t {
+  kGp_W,
+  kGp_X,
+  kGp_X_SP
+};
+
+// a64::InstDB - OPSig
+// ===================
+
+enum kOpSignature : uint32_t {
+  kOp_GpW = GpW::kSignature,
+  kOp_GpX = GpX::kSignature,
+
+  kOp_B = VecB::kSignature,
+  kOp_H = VecH::kSignature,
+  kOp_S = VecS::kSignature,
+  kOp_D = VecD::kSignature,
+  kOp_Q = VecV::kSignature,
+
+  kOp_V8B = VecD::kSignature | Vec::kSignatureElementB,
+  kOp_V4H = VecD::kSignature | Vec::kSignatureElementH,
+  kOp_V2S = VecD::kSignature | Vec::kSignatureElementS,
+
+  kOp_V16B = VecV::kSignature | Vec::kSignatureElementB,
+  kOp_V8H = VecV::kSignature | Vec::kSignatureElementH,
+  kOp_V4S = VecV::kSignature | Vec::kSignatureElementS,
+  kOp_V2D = VecV::kSignature | Vec::kSignatureElementD
+};
+
+// a64::InstDB - HFConv
+// ====================
+
+enum kHFConv : uint32_t {
+  //! FP16 version of the instruction is not available.
+  kHF_N,
+
+  //! Doesn't do any change to the opcode.
+  kHF_0,
+
+  kHF_A,
+  kHF_B,
+  kHF_C,
+  kHF_D,
+
+  kHF_Count
+};
+
+// a64::InstDB - VOType
+// ====================
+
+//! Vector operand type combinations used by FP&SIMD instructions.
+enum VOType : uint32_t {
+  kVO_V_B,
+  kVO_V_BH,
+  kVO_V_BH_4S,
+  kVO_V_BHS,
+  kVO_V_BHS_D2,
+  kVO_V_HS,
+  kVO_V_S,
+
+  kVO_V_B8H4,
+  kVO_V_B8H4S2,
+  kVO_V_B8D1,
+  kVO_V_H4S2,
+
+  kVO_V_B16,
+  kVO_V_B16H8,
+  kVO_V_B16H8S4,
+  kVO_V_B16D2,
+  kVO_V_H8S4,
+  kVO_V_S4,
+  kVO_V_D2,
+
+  kVO_SV_BHS,
+  kVO_SV_B8H4S2,
+  kVO_SV_HS,
+  kVO_V_Any,
+  kVO_SV_Any,
+
+  kVO_Count
+};
+
+// a64::InstDB - EncodingId
+// ========================
+
+// ${EncodingId:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum EncodingId : uint32_t {
+  kEncodingNone = 0,
+  kEncodingBaseAddSub,
+  kEncodingBaseAdr,
+  kEncodingBaseAtDcIcTlbi,
+  kEncodingBaseAtomicCasp,
+  kEncodingBaseAtomicOp,
+  kEncodingBaseAtomicSt,
+  kEncodingBaseBfc,
+  kEncodingBaseBfi,
+  kEncodingBaseBfm,
+  kEncodingBaseBfx,
+  kEncodingBaseBranchCmp,
+  kEncodingBaseBranchReg,
+  kEncodingBaseBranchRel,
+  kEncodingBaseBranchTst,
+  kEncodingBaseCCmp,
+  kEncodingBaseCInc,
+  kEncodingBaseCSel,
+  kEncodingBaseCSet,
+  kEncodingBaseCmpCmn,
+  kEncodingBaseExtend,
+  kEncodingBaseExtract,
+  kEncodingBaseLdSt,
+  kEncodingBaseLdpStp,
+  kEncodingBaseLdxp,
+  kEncodingBaseLogical,
+  kEncodingBaseMov,
+  kEncodingBaseMovKNZ,
+  kEncodingBaseMrs,
+  kEncodingBaseMsr,
+  kEncodingBaseMvnNeg,
+  kEncodingBaseOp,
+  kEncodingBaseOpImm,
+  kEncodingBaseR,
+  kEncodingBaseRM_NoImm,
+  kEncodingBaseRM_SImm10,
+  kEncodingBaseRM_SImm9,
+  kEncodingBaseRR,
+  kEncodingBaseRRII,
+  kEncodingBaseRRR,
+  kEncodingBaseRRRR,
+  kEncodingBaseRev,
+  kEncodingBaseShift,
+  kEncodingBaseStx,
+  kEncodingBaseStxp,
+  kEncodingBaseSys,
+  kEncodingBaseTst,
+  kEncodingFSimdPair,
+  kEncodingFSimdSV,
+  kEncodingFSimdVV,
+  kEncodingFSimdVVV,
+  kEncodingFSimdVVVV,
+  kEncodingFSimdVVVe,
+  kEncodingISimdPair,
+  kEncodingISimdSV,
+  kEncodingISimdVV,
+  kEncodingISimdVVV,
+  kEncodingISimdVVVI,
+  kEncodingISimdVVVV,
+  kEncodingISimdVVVVx,
+  kEncodingISimdVVVe,
+  kEncodingISimdVVVx,
+  kEncodingISimdVVx,
+  kEncodingISimdWWV,
+  kEncodingSimdBicOrr,
+  kEncodingSimdCmp,
+  kEncodingSimdDot,
+  kEncodingSimdDup,
+  kEncodingSimdFcadd,
+  kEncodingSimdFccmpFccmpe,
+  kEncodingSimdFcm,
+  kEncodingSimdFcmla,
+  kEncodingSimdFcmpFcmpe,
+  kEncodingSimdFcsel,
+  kEncodingSimdFcvt,
+  kEncodingSimdFcvtLN,
+  kEncodingSimdFcvtSV,
+  kEncodingSimdFmlal,
+  kEncodingSimdFmov,
+  kEncodingSimdIns,
+  kEncodingSimdLdNStN,
+  kEncodingSimdLdSt,
+  kEncodingSimdLdpStp,
+  kEncodingSimdLdurStur,
+  kEncodingSimdMov,
+  kEncodingSimdMoviMvni,
+  kEncodingSimdShift,
+  kEncodingSimdShiftES,
+  kEncodingSimdSm3tt,
+  kEncodingSimdSmovUmov,
+  kEncodingSimdSxtlUxtl,
+  kEncodingSimdTblTbx
+};
+// ----------------------------------------------------------------------------
+// ${EncodingId:End}
+
+// a64::InstDB::EncodingData
+// =========================
+
+namespace EncodingData {
+
+#define M_OPCODE(field, bits) \
+  uint32_t _##field : bits; \
+  inline constexpr uint32_t field() const noexcept { return uint32_t(_##field) << (32 - bits); }
+
+struct BaseOp {
+  uint32_t opcode;
+};
+
+struct BaseOpImm {
+  uint32_t opcode;
+  uint16_t immBits;
+  uint16_t immOffset;
+};
+
+struct BaseR {
+  uint32_t opcode;
+  uint32_t rType : 8;
+  uint32_t rHiId : 8;
+  uint32_t rShift : 8;
+};
+
+struct BaseRR {
+  uint32_t opcode;
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t aShift : 5;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t bShift : 5;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRR {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t cType : 2;
+  uint32_t cHiId : 6;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRRR {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t cType : 2;
+  uint32_t cHiId : 6;
+  uint32_t dType : 2;
+  uint32_t dHiId : 6;
+  uint32_t uniform : 1;
+};
+
+struct BaseRRII {
+  M_OPCODE(opcode, 22)
+  uint32_t aType : 2;
+  uint32_t aHiId : 6;
+  uint32_t bType : 2;
+  uint32_t bHiId : 6;
+  uint32_t aImmSize : 6;
+  uint32_t aImmDiscardLsb : 5;
+  uint32_t aImmOffset : 5;
+  uint32_t bImmSize : 6;
+  uint32_t bImmDiscardLsb : 5;
+  uint32_t bImmOffset : 5;
+};
+
+struct BaseAtDcIcTlbi {
+  uint32_t immVerifyMask : 14;
+  uint32_t immVerifyData : 14;
+  uint32_t mandatoryReg : 1;
+};
+
+struct BaseAdcSbc {
+  uint32_t opcode;
+};
+
+struct BaseAddSub {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|Rd|
+  uint32_t extendedOp : 10; // sf|.......|..|.|Rm|Opt|Imm3|Rn|Rd|
+  uint32_t immediateOp: 10; // sf|.......|Sh|    Imm:12   |Rn|Rd|
+};
+
+struct BaseAdr {
+  M_OPCODE(opcode, 22)
+  OffsetType offsetType : 8;
+};
+
+struct BaseBfm {
+  uint32_t opcode;         // sf|........|N|ImmR:6|ImmS:6|Rn|Rd|
+};
+
+struct BaseCmpCmn {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|11111|
+  uint32_t extendedOp : 10; // sf|.......|..|.|Rm|Opt|Imm3|Rn|11111|
+  uint32_t immediateOp: 10; // sf|.......|Sh|    Imm:12   |Rn|11111|
+};
+
+struct BaseExtend {
+  M_OPCODE(opcode, 22)      // sf|........|N|......|......|Rn|Rd|
+  uint32_t rType : 2;
+  uint32_t u : 1;
+};
+
+struct BaseLogical {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|Rd|
+  uint32_t immediateOp: 10; // sf|........|N|ImmR:6|ImmS:6|Rn|Rd|
+  uint32_t negateImm  : 1 ; // True if this is an operation that must negate IMM.
+};
+
+struct BaseMvnNeg {
+  uint32_t opcode;
+};
+
+struct BaseShift {
+  M_OPCODE(registerOp, 22)
+  M_OPCODE(immediateOp, 22)
+  uint32_t ror : 2;
+};
+
+struct BaseTst {
+  uint32_t shiftedOp  : 10; // sf|.......|Sh|.|Rm|  Imm:6 |Rn|11111|
+  uint32_t immediateOp: 10; // sf|........|N|ImmR:6|ImmS:6|Rn|11111|
+};
+
+struct BaseRM_NoImm {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+};
+
+struct BaseRM_SImm9 {
+  M_OPCODE(offsetOp, 22)
+  M_OPCODE(prePostOp, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+  uint32_t immShift : 4;
+};
+
+struct BaseRM_SImm10 {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t rHiId : 6;
+  uint32_t xOffset : 5;
+  uint32_t immShift : 4;
+};
+
+struct BaseLdSt {
+  uint32_t uOffsetOp  : 10;
+  uint32_t prePostOp  : 11;
+  uint32_t registerOp : 11;
+  uint32_t literalOp  : 8;
+  uint32_t rType      : 2;
+  uint32_t xOffset    : 5;
+  uint32_t uOffsetShift : 3;
+  uint32_t uAltInstId : 14;
+};
+
+struct BaseLdpStp {
+  uint32_t offsetOp : 10;
+  uint32_t prePostOp : 10;
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+  uint32_t offsetShift : 3;
+};
+
+struct BaseStx {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseLdxp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseStxp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseAtomicOp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+  uint32_t zr : 1;
+};
+
+struct BaseAtomicSt {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+struct BaseAtomicCasp {
+  M_OPCODE(opcode, 22)
+  uint32_t rType : 2;
+  uint32_t xOffset : 5;
+};
+
+typedef BaseOp BaseBranchReg;
+typedef BaseOp BaseBranchRel;
+typedef BaseOp BaseBranchCmp;
+typedef BaseOp BaseBranchTst;
+typedef BaseOp BaseExtract;
+typedef BaseOp BaseBfc;
+typedef BaseOp BaseBfi;
+typedef BaseOp BaseBfx;
+typedef BaseOp BaseCCmp;
+typedef BaseOp BaseCInc;
+typedef BaseOp BaseCSet;
+typedef BaseOp BaseCSel;
+typedef BaseOp BaseMovKNZ;
+typedef BaseOp BaseMull;
+
+struct FSimdGeneric {
+  uint32_t _scalarOp : 28;
+  uint32_t _scalarHf : 4;
+  uint32_t _vectorOp : 28;
+  uint32_t _vectorHf : 4;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t scalarHf() const noexcept { return uint32_t(_scalarHf); }
+  constexpr uint32_t vectorHf() const noexcept { return uint32_t(_vectorHf); }
+};
+
+typedef FSimdGeneric FSimdVV;
+typedef FSimdGeneric FSimdVVV;
+typedef FSimdGeneric FSimdVVVV;
+
+struct FSimdSV {
+  uint32_t opcode;
+};
+
+struct FSimdVVVe {
+  uint32_t _scalarOp : 28;
+  uint32_t _scalarHf : 4;
+  uint32_t _vectorOp;
+  uint32_t _elementOp;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t scalarHf() const noexcept { return uint32_t(_scalarHf); };
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t vectorHf() const noexcept { return kHF_C; }
+  constexpr uint32_t elementScalarOp() const noexcept { return (uint32_t(_elementOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t elementVectorOp() const noexcept { return (uint32_t(_elementOp) << 10); }
+};
+
+struct SimdFcadd {
+  uint32_t _opcode;
+
+  constexpr uint32_t opcode() const noexcept { return _opcode << 10; }
+};
+
+struct SimdFcmla {
+  uint32_t _regularOp;
+  uint32_t _elementOp;
+
+  constexpr uint32_t regularOp() const noexcept { return uint32_t(_regularOp) << 10; }
+  constexpr uint32_t elementOp() const noexcept { return (uint32_t(_elementOp) << 10); }
+};
+
+struct SimdFccmpFccmpe {
+  uint32_t _opcode;
+  constexpr uint32_t opcode() const noexcept { return _opcode; }
+};
+
+struct SimdFcm {
+  uint32_t _registerOp : 28;
+  uint32_t _registerHf : 4;
+
+  uint32_t _zeroOp : 28;
+
+  constexpr bool hasRegisterOp() const noexcept { return _registerOp != 0; }
+  constexpr bool hasZeroOp() const noexcept { return _zeroOp != 0; }
+
+  constexpr uint32_t registerScalarOp() const noexcept { return (uint32_t(_registerOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t registerVectorOp() const noexcept { return uint32_t(_registerOp) << 10; }
+  constexpr uint32_t registerScalarHf() const noexcept { return uint32_t(_registerHf); }
+  constexpr uint32_t registerVectorHf() const noexcept { return uint32_t(_registerHf); }
+
+  constexpr uint32_t zeroScalarOp() const noexcept { return (uint32_t(_zeroOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t zeroVectorOp() const noexcept { return (uint32_t(_zeroOp) << 10); }
+};
+
+struct SimdFcmpFcmpe {
+  uint32_t _opcode;
+  constexpr uint32_t opcode() const noexcept { return _opcode; }
+};
+
+struct SimdFcvtLN {
+  uint32_t _opcode : 22;
+  uint32_t _isCvtxn : 1;
+  uint32_t _hasScalar : 1;
+
+  constexpr uint32_t scalarOp() const noexcept { return (uint32_t(_opcode) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorOp() const noexcept { return (uint32_t(_opcode) << 10); }
+
+  constexpr uint32_t isCvtxn() const noexcept { return _isCvtxn; }
+  constexpr uint32_t hasScalar() const noexcept { return _hasScalar; }
+};
+
+struct SimdFcvtSV {
+  uint32_t _vectorIntOp;
+  uint32_t _vectorFpOp;
+  uint32_t _generalOp : 31;
+  uint32_t _isFloatToInt : 1;
+
+  constexpr uint32_t scalarIntOp() const noexcept { return (uint32_t(_vectorIntOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorIntOp() const noexcept { return uint32_t(_vectorIntOp) << 10; }
+  constexpr uint32_t scalarFpOp() const noexcept { return (uint32_t(_vectorFpOp) << 10) | (0x5u << 28); }
+  constexpr uint32_t vectorFpOp() const noexcept { return uint32_t(_vectorFpOp) << 10; }
+  constexpr uint32_t generalOp() const noexcept { return (uint32_t(_generalOp) << 10); }
+
+  constexpr uint32_t isFloatToInt() const noexcept { return _isFloatToInt; }
+  constexpr uint32_t isFixedPoint() const noexcept { return _vectorFpOp != 0; }
+};
+
+struct SimdFmlal {
+  uint32_t _vectorOp;
+  uint32_t _elementOp;
+  uint8_t _optionalQ;
+  uint8_t tA;
+  uint8_t tB;
+  uint8_t tElement;
+
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+  constexpr uint32_t elementOp() const noexcept { return uint32_t(_elementOp) << 10; }
+  constexpr uint32_t optionalQ() const noexcept { return _optionalQ; }
+};
+
+struct FSimdPair {
+  uint32_t _scalarOp;
+  uint32_t _vectorOp;
+
+  constexpr uint32_t scalarOp() const noexcept { return uint32_t(_scalarOp) << 10; }
+  constexpr uint32_t vectorOp() const noexcept { return uint32_t(_vectorOp) << 10; }
+};
+
+struct ISimdVV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVx {
+  M_OPCODE(opcode, 22)
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+};
+
+struct ISimdSV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVx {
+  M_OPCODE(opcode, 22)
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+  uint32_t op2Signature;
+};
+
+struct ISimdWWV {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVe {
+  uint32_t regularOp : 26; // 22 bits used.
+  uint32_t regularVecType : 6;
+  uint32_t elementOp : 26; // 22 bits used.
+  uint32_t elementVecType : 6;
+};
+
+struct ISimdVVVI {
+  M_OPCODE(opcode, 22)
+  uint32_t vecOpType : 6;
+  uint32_t immSize : 4;
+  uint32_t immShift : 4;
+  uint32_t imm64HasOneBitLess : 1;
+};
+
+struct ISimdVVVV {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct ISimdVVVVx {
+  uint32_t opcode;
+  uint32_t op0Signature;
+  uint32_t op1Signature;
+  uint32_t op2Signature;
+  uint32_t op3Signature;
+};
+
+struct SimdBicOrr {
+  uint32_t registerOp;   // 22 bits used.
+  uint32_t immediateOp;  // 22 bits used.
+};
+
+struct SimdCmp {
+  uint32_t regOp;
+  uint32_t zeroOp : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdDot {
+  uint32_t vectorOp;     // 22 bits used.
+  uint32_t elementOp;    // 22 bits used.
+  uint8_t tA;            // Element-type of the first operand.
+  uint8_t tB;            // Element-type of the second and third operands.
+  uint8_t tElement;      // Element-type of the element index[] operand.
+};
+
+struct SimdMoviMvni {
+  uint32_t opcode : 31;
+  uint32_t inverted : 1;
+};
+
+struct SimdLdSt {
+  uint32_t uOffsetOp  : 10;
+  uint32_t prePostOp  : 11;
+  uint32_t registerOp : 11;
+  uint32_t literalOp  : 8;
+  uint32_t uAltInstId : 16;
+};
+
+struct SimdLdNStN {
+  uint32_t singleOp;
+  uint32_t multipleOp : 22;
+  uint32_t n : 3;
+  uint32_t replicate : 1;
+};
+
+struct SimdLdpStp {
+  uint32_t offsetOp : 10;
+  uint32_t prePostOp : 10;
+};
+
+struct SimdLdurStur {
+  uint32_t opcode;
+};
+
+struct ISimdPair {
+  uint32_t opcode2;      // 22 bits used.
+  uint32_t opcode3 : 26; // 22 bits used.
+  uint32_t opType3 : 6;
+};
+
+struct SimdShift {
+  uint32_t registerOp;       // 22 bits used.
+  uint32_t immediateOp : 22; // 22 bits used.
+  uint32_t invertedImm : 1;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdShiftES {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdSm3tt {
+  uint32_t opcode;
+};
+
+struct SimdSmovUmov {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+  uint32_t isSigned : 1;
+};
+
+struct SimdSxtlUxtl {
+  uint32_t opcode : 22;
+  uint32_t vecOpType : 6;
+};
+
+struct SimdTblTbx {
+  uint32_t opcode;
+};
+
+#undef M_OPCODE
+
+// ${EncodingDataForward:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+extern const BaseAddSub baseAddSub[4];
+extern const BaseAdr baseAdr[2];
+extern const BaseAtDcIcTlbi baseAtDcIcTlbi[4];
+extern const BaseAtomicCasp baseAtomicCasp[4];
+extern const BaseAtomicOp baseAtomicOp[123];
+extern const BaseAtomicSt baseAtomicSt[48];
+extern const BaseBfc baseBfc[1];
+extern const BaseBfi baseBfi[3];
+extern const BaseBfm baseBfm[3];
+extern const BaseBfx baseBfx[3];
+extern const BaseBranchCmp baseBranchCmp[2];
+extern const BaseBranchReg baseBranchReg[3];
+extern const BaseBranchRel baseBranchRel[2];
+extern const BaseBranchTst baseBranchTst[2];
+extern const BaseCCmp baseCCmp[2];
+extern const BaseCInc baseCInc[3];
+extern const BaseCSel baseCSel[4];
+extern const BaseCSet baseCSet[2];
+extern const BaseCmpCmn baseCmpCmn[2];
+extern const BaseExtend baseExtend[5];
+extern const BaseExtract baseExtract[1];
+extern const BaseLdSt baseLdSt[9];
+extern const BaseLdpStp baseLdpStp[6];
+extern const BaseLdxp baseLdxp[2];
+extern const BaseLogical baseLogical[8];
+extern const BaseMovKNZ baseMovKNZ[3];
+extern const BaseMvnNeg baseMvnNeg[3];
+extern const BaseOp baseOp[23];
+extern const BaseOpImm baseOpImm[14];
+extern const BaseR baseR[10];
+extern const BaseRM_NoImm baseRM_NoImm[21];
+extern const BaseRM_SImm10 baseRM_SImm10[2];
+extern const BaseRM_SImm9 baseRM_SImm9[23];
+extern const BaseRR baseRR[15];
+extern const BaseRRII baseRRII[2];
+extern const BaseRRR baseRRR[26];
+extern const BaseRRRR baseRRRR[6];
+extern const BaseShift baseShift[8];
+extern const BaseStx baseStx[3];
+extern const BaseStxp baseStxp[2];
+extern const BaseTst baseTst[1];
+extern const FSimdPair fSimdPair[5];
+extern const FSimdSV fSimdSV[4];
+extern const FSimdVV fSimdVV[17];
+extern const FSimdVVV fSimdVVV[13];
+extern const FSimdVVVV fSimdVVVV[4];
+extern const FSimdVVVe fSimdVVVe[4];
+extern const ISimdPair iSimdPair[1];
+extern const ISimdSV iSimdSV[7];
+extern const ISimdVV iSimdVV[29];
+extern const ISimdVVV iSimdVVV[65];
+extern const ISimdVVVI iSimdVVVI[2];
+extern const ISimdVVVV iSimdVVVV[2];
+extern const ISimdVVVVx iSimdVVVVx[1];
+extern const ISimdVVVe iSimdVVVe[25];
+extern const ISimdVVVx iSimdVVVx[17];
+extern const ISimdVVx iSimdVVx[13];
+extern const ISimdWWV iSimdWWV[8];
+extern const SimdBicOrr simdBicOrr[2];
+extern const SimdCmp simdCmp[7];
+extern const SimdDot simdDot[5];
+extern const SimdFcadd simdFcadd[1];
+extern const SimdFccmpFccmpe simdFccmpFccmpe[2];
+extern const SimdFcm simdFcm[5];
+extern const SimdFcmla simdFcmla[1];
+extern const SimdFcmpFcmpe simdFcmpFcmpe[2];
+extern const SimdFcvtLN simdFcvtLN[6];
+extern const SimdFcvtSV simdFcvtSV[12];
+extern const SimdFmlal simdFmlal[6];
+extern const SimdLdNStN simdLdNStN[12];
+extern const SimdLdSt simdLdSt[2];
+extern const SimdLdpStp simdLdpStp[4];
+extern const SimdLdurStur simdLdurStur[2];
+extern const SimdMoviMvni simdMoviMvni[2];
+extern const SimdShift simdShift[40];
+extern const SimdShiftES simdShiftES[2];
+extern const SimdSm3tt simdSm3tt[4];
+extern const SimdSmovUmov simdSmovUmov[2];
+extern const SimdSxtlUxtl simdSxtlUxtl[4];
+extern const SimdTblTbx simdTblTbx[2];
+// ----------------------------------------------------------------------------
+// ${EncodingDataForward:End}
+
+} // {EncodingData}
+
+// a64::InstDB - InstNameIndex
+// ===========================
+
+// ${NameLimits:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum : uint32_t { kMaxNameSize = 9 };
+// ----------------------------------------------------------------------------
+// ${NameLimits:End}
+
+struct InstNameIndex {
+  uint16_t start;
+  uint16_t end;
+};
+
+// a64::InstDB - Tables
+// ====================
+
+#ifndef ASMJIT_NO_TEXT
+extern const char _nameData[];
+extern const InstNameIndex instNameIndex[26];
+#endif // !ASMJIT_NO_TEXT
+
+} // {InstDB}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_A64_ARMINSTDB_H_P_INCLUDED
+
--- a/src/asmjit/arm/a64operand.cpp
+++ b/src/asmjit/arm/a64operand.cpp
@ -0,0 +1,85 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64)
+
+#include "../core/misc_p.h"
+#include "../arm/a64operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::Operand - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+UNIT(a64_operand) {
+  INFO("Checking if a64::reg(...) matches built-in IDs");
+  EXPECT(w(5) == w5);
+  EXPECT(x(5) == x5);
+
+  INFO("Checking Gp register properties");
+  EXPECT(Gp().isReg() == true);
+  EXPECT(w0.isReg() == true);
+  EXPECT(x0.isReg() == true);
+  EXPECT(w0.id() == 0);
+  EXPECT(x0.id() == 0);
+  EXPECT(wzr.id() == Gp::kIdZr);
+  EXPECT(xzr.id() == Gp::kIdZr);
+  EXPECT(wsp.id() == Gp::kIdSp);
+  EXPECT(sp.id() == Gp::kIdSp);
+  EXPECT(w0.size() == 4);
+  EXPECT(x0.size() == 8);
+  EXPECT(w0.type() == RegType::kARM_GpW);
+  EXPECT(x0.type() == RegType::kARM_GpX);
+  EXPECT(w0.group() == RegGroup::kGp);
+  EXPECT(x0.group() == RegGroup::kGp);
+
+  INFO("Checking Vec register properties");
+  EXPECT(v0.type() == RegType::kARM_VecV);
+  EXPECT(d0.type() == RegType::kARM_VecD);
+  EXPECT(s0.type() == RegType::kARM_VecS);
+  EXPECT(h0.type() == RegType::kARM_VecH);
+  EXPECT(b0.type() == RegType::kARM_VecB);
+
+  EXPECT(v0.group() == RegGroup::kVec);
+  EXPECT(d0.group() == RegGroup::kVec);
+  EXPECT(s0.group() == RegGroup::kVec);
+  EXPECT(h0.group() == RegGroup::kVec);
+  EXPECT(b0.group() == RegGroup::kVec);
+
+  INFO("Checking Vec register element[] access");
+  Vec vd_1 = v15.d(1);
+  EXPECT(vd_1.type() == RegType::kARM_VecV);
+  EXPECT(vd_1.group() == RegGroup::kVec);
+  EXPECT(vd_1.id() == 15);
+  EXPECT(vd_1.isVecD2());
+  EXPECT(vd_1.elementType() == Vec::kElementTypeD);
+  EXPECT(vd_1.hasElementIndex());
+  EXPECT(vd_1.elementIndex() == 1);
+
+  Vec vs_3 = v15.s(3);
+  EXPECT(vs_3.type() == RegType::kARM_VecV);
+  EXPECT(vs_3.group() == RegGroup::kVec);
+  EXPECT(vs_3.id() == 15);
+  EXPECT(vs_3.isVecS4());
+  EXPECT(vs_3.elementType() == Vec::kElementTypeS);
+  EXPECT(vs_3.hasElementIndex());
+  EXPECT(vs_3.elementIndex() == 3);
+
+  Vec vb_4 = v15.b4(3);
+  EXPECT(vb_4.type() == RegType::kARM_VecV);
+  EXPECT(vb_4.group() == RegGroup::kVec);
+  EXPECT(vb_4.id() == 15);
+  EXPECT(vb_4.isVecB4x4());
+  EXPECT(vb_4.elementType() == Vec::kElementTypeB4);
+  EXPECT(vb_4.hasElementIndex());
+  EXPECT(vb_4.elementIndex() == 3);
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64
--- a/src/asmjit/arm/a64operand.h
+++ b/src/asmjit/arm/a64operand.h
@ -0,0 +1,312 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64OPERAND_H_INCLUDED
+#define ASMJIT_ARM_A64OPERAND_H_INCLUDED
+
+#include "../arm/armoperand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+using arm::Reg;
+using arm::Mem;
+using arm::Gp;
+using arm::GpW;
+using arm::GpX;
+
+using arm::Vec;
+using arm::VecB;
+using arm::VecH;
+using arm::VecS;
+using arm::VecD;
+using arm::VecV;
+
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+using namespace ::asmjit::arm::regs;
+
+static constexpr GpW w0 = GpW(0);
+static constexpr GpW w1 = GpW(1);
+static constexpr GpW w2 = GpW(2);
+static constexpr GpW w3 = GpW(3);
+static constexpr GpW w4 = GpW(4);
+static constexpr GpW w5 = GpW(5);
+static constexpr GpW w6 = GpW(6);
+static constexpr GpW w7 = GpW(7);
+static constexpr GpW w8 = GpW(8);
+static constexpr GpW w9 = GpW(9);
+static constexpr GpW w10 = GpW(10);
+static constexpr GpW w11 = GpW(11);
+static constexpr GpW w12 = GpW(12);
+static constexpr GpW w13 = GpW(13);
+static constexpr GpW w14 = GpW(14);
+static constexpr GpW w15 = GpW(15);
+static constexpr GpW w16 = GpW(16);
+static constexpr GpW w17 = GpW(17);
+static constexpr GpW w18 = GpW(18);
+static constexpr GpW w19 = GpW(19);
+static constexpr GpW w20 = GpW(20);
+static constexpr GpW w21 = GpW(21);
+static constexpr GpW w22 = GpW(22);
+static constexpr GpW w23 = GpW(23);
+static constexpr GpW w24 = GpW(24);
+static constexpr GpW w25 = GpW(25);
+static constexpr GpW w26 = GpW(26);
+static constexpr GpW w27 = GpW(27);
+static constexpr GpW w28 = GpW(28);
+static constexpr GpW w29 = GpW(29);
+static constexpr GpW w30 = GpW(30);
+static constexpr GpW wzr = GpW(Gp::kIdZr);
+static constexpr GpW wsp = GpW(Gp::kIdSp);
+
+static constexpr GpX x0 = GpX(0);
+static constexpr GpX x1 = GpX(1);
+static constexpr GpX x2 = GpX(2);
+static constexpr GpX x3 = GpX(3);
+static constexpr GpX x4 = GpX(4);
+static constexpr GpX x5 = GpX(5);
+static constexpr GpX x6 = GpX(6);
+static constexpr GpX x7 = GpX(7);
+static constexpr GpX x8 = GpX(8);
+static constexpr GpX x9 = GpX(9);
+static constexpr GpX x10 = GpX(10);
+static constexpr GpX x11 = GpX(11);
+static constexpr GpX x12 = GpX(12);
+static constexpr GpX x13 = GpX(13);
+static constexpr GpX x14 = GpX(14);
+static constexpr GpX x15 = GpX(15);
+static constexpr GpX x16 = GpX(16);
+static constexpr GpX x17 = GpX(17);
+static constexpr GpX x18 = GpX(18);
+static constexpr GpX x19 = GpX(19);
+static constexpr GpX x20 = GpX(20);
+static constexpr GpX x21 = GpX(21);
+static constexpr GpX x22 = GpX(22);
+static constexpr GpX x23 = GpX(23);
+static constexpr GpX x24 = GpX(24);
+static constexpr GpX x25 = GpX(25);
+static constexpr GpX x26 = GpX(26);
+static constexpr GpX x27 = GpX(27);
+static constexpr GpX x28 = GpX(28);
+static constexpr GpX x29 = GpX(29);
+static constexpr GpX x30 = GpX(30);
+static constexpr GpX xzr = GpX(Gp::kIdZr);
+static constexpr GpX sp = GpX(Gp::kIdSp);
+
+static constexpr VecB b0 = VecB(0);
+static constexpr VecB b1 = VecB(1);
+static constexpr VecB b2 = VecB(2);
+static constexpr VecB b3 = VecB(3);
+static constexpr VecB b4 = VecB(4);
+static constexpr VecB b5 = VecB(5);
+static constexpr VecB b6 = VecB(6);
+static constexpr VecB b7 = VecB(7);
+static constexpr VecB b8 = VecB(8);
+static constexpr VecB b9 = VecB(9);
+static constexpr VecB b10 = VecB(10);
+static constexpr VecB b11 = VecB(11);
+static constexpr VecB b12 = VecB(12);
+static constexpr VecB b13 = VecB(13);
+static constexpr VecB b14 = VecB(14);
+static constexpr VecB b15 = VecB(15);
+static constexpr VecB b16 = VecB(16);
+static constexpr VecB b17 = VecB(17);
+static constexpr VecB b18 = VecB(18);
+static constexpr VecB b19 = VecB(19);
+static constexpr VecB b20 = VecB(20);
+static constexpr VecB b21 = VecB(21);
+static constexpr VecB b22 = VecB(22);
+static constexpr VecB b23 = VecB(23);
+static constexpr VecB b24 = VecB(24);
+static constexpr VecB b25 = VecB(25);
+static constexpr VecB b26 = VecB(26);
+static constexpr VecB b27 = VecB(27);
+static constexpr VecB b28 = VecB(28);
+static constexpr VecB b29 = VecB(29);
+static constexpr VecB b30 = VecB(30);
+static constexpr VecB b31 = VecB(31);
+
+static constexpr VecH h0 = VecH(0);
+static constexpr VecH h1 = VecH(1);
+static constexpr VecH h2 = VecH(2);
+static constexpr VecH h3 = VecH(3);
+static constexpr VecH h4 = VecH(4);
+static constexpr VecH h5 = VecH(5);
+static constexpr VecH h6 = VecH(6);
+static constexpr VecH h7 = VecH(7);
+static constexpr VecH h8 = VecH(8);
+static constexpr VecH h9 = VecH(9);
+static constexpr VecH h10 = VecH(10);
+static constexpr VecH h11 = VecH(11);
+static constexpr VecH h12 = VecH(12);
+static constexpr VecH h13 = VecH(13);
+static constexpr VecH h14 = VecH(14);
+static constexpr VecH h15 = VecH(15);
+static constexpr VecH h16 = VecH(16);
+static constexpr VecH h17 = VecH(17);
+static constexpr VecH h18 = VecH(18);
+static constexpr VecH h19 = VecH(19);
+static constexpr VecH h20 = VecH(20);
+static constexpr VecH h21 = VecH(21);
+static constexpr VecH h22 = VecH(22);
+static constexpr VecH h23 = VecH(23);
+static constexpr VecH h24 = VecH(24);
+static constexpr VecH h25 = VecH(25);
+static constexpr VecH h26 = VecH(26);
+static constexpr VecH h27 = VecH(27);
+static constexpr VecH h28 = VecH(28);
+static constexpr VecH h29 = VecH(29);
+static constexpr VecH h30 = VecH(30);
+static constexpr VecH h31 = VecH(31);
+
+static constexpr VecS s0 = VecS(0);
+static constexpr VecS s1 = VecS(1);
+static constexpr VecS s2 = VecS(2);
+static constexpr VecS s3 = VecS(3);
+static constexpr VecS s4 = VecS(4);
+static constexpr VecS s5 = VecS(5);
+static constexpr VecS s6 = VecS(6);
+static constexpr VecS s7 = VecS(7);
+static constexpr VecS s8 = VecS(8);
+static constexpr VecS s9 = VecS(9);
+static constexpr VecS s10 = VecS(10);
+static constexpr VecS s11 = VecS(11);
+static constexpr VecS s12 = VecS(12);
+static constexpr VecS s13 = VecS(13);
+static constexpr VecS s14 = VecS(14);
+static constexpr VecS s15 = VecS(15);
+static constexpr VecS s16 = VecS(16);
+static constexpr VecS s17 = VecS(17);
+static constexpr VecS s18 = VecS(18);
+static constexpr VecS s19 = VecS(19);
+static constexpr VecS s20 = VecS(20);
+static constexpr VecS s21 = VecS(21);
+static constexpr VecS s22 = VecS(22);
+static constexpr VecS s23 = VecS(23);
+static constexpr VecS s24 = VecS(24);
+static constexpr VecS s25 = VecS(25);
+static constexpr VecS s26 = VecS(26);
+static constexpr VecS s27 = VecS(27);
+static constexpr VecS s28 = VecS(28);
+static constexpr VecS s29 = VecS(29);
+static constexpr VecS s30 = VecS(30);
+static constexpr VecS s31 = VecS(31);
+
+static constexpr VecD d0 = VecD(0);
+static constexpr VecD d1 = VecD(1);
+static constexpr VecD d2 = VecD(2);
+static constexpr VecD d3 = VecD(3);
+static constexpr VecD d4 = VecD(4);
+static constexpr VecD d5 = VecD(5);
+static constexpr VecD d6 = VecD(6);
+static constexpr VecD d7 = VecD(7);
+static constexpr VecD d8 = VecD(8);
+static constexpr VecD d9 = VecD(9);
+static constexpr VecD d10 = VecD(10);
+static constexpr VecD d11 = VecD(11);
+static constexpr VecD d12 = VecD(12);
+static constexpr VecD d13 = VecD(13);
+static constexpr VecD d14 = VecD(14);
+static constexpr VecD d15 = VecD(15);
+static constexpr VecD d16 = VecD(16);
+static constexpr VecD d17 = VecD(17);
+static constexpr VecD d18 = VecD(18);
+static constexpr VecD d19 = VecD(19);
+static constexpr VecD d20 = VecD(20);
+static constexpr VecD d21 = VecD(21);
+static constexpr VecD d22 = VecD(22);
+static constexpr VecD d23 = VecD(23);
+static constexpr VecD d24 = VecD(24);
+static constexpr VecD d25 = VecD(25);
+static constexpr VecD d26 = VecD(26);
+static constexpr VecD d27 = VecD(27);
+static constexpr VecD d28 = VecD(28);
+static constexpr VecD d29 = VecD(29);
+static constexpr VecD d30 = VecD(30);
+static constexpr VecD d31 = VecD(31);
+
+static constexpr VecV q0 = VecV(0);
+static constexpr VecV q1 = VecV(1);
+static constexpr VecV q2 = VecV(2);
+static constexpr VecV q3 = VecV(3);
+static constexpr VecV q4 = VecV(4);
+static constexpr VecV q5 = VecV(5);
+static constexpr VecV q6 = VecV(6);
+static constexpr VecV q7 = VecV(7);
+static constexpr VecV q8 = VecV(8);
+static constexpr VecV q9 = VecV(9);
+static constexpr VecV q10 = VecV(10);
+static constexpr VecV q11 = VecV(11);
+static constexpr VecV q12 = VecV(12);
+static constexpr VecV q13 = VecV(13);
+static constexpr VecV q14 = VecV(14);
+static constexpr VecV q15 = VecV(15);
+static constexpr VecV q16 = VecV(16);
+static constexpr VecV q17 = VecV(17);
+static constexpr VecV q18 = VecV(18);
+static constexpr VecV q19 = VecV(19);
+static constexpr VecV q20 = VecV(20);
+static constexpr VecV q21 = VecV(21);
+static constexpr VecV q22 = VecV(22);
+static constexpr VecV q23 = VecV(23);
+static constexpr VecV q24 = VecV(24);
+static constexpr VecV q25 = VecV(25);
+static constexpr VecV q26 = VecV(26);
+static constexpr VecV q27 = VecV(27);
+static constexpr VecV q28 = VecV(28);
+static constexpr VecV q29 = VecV(29);
+static constexpr VecV q30 = VecV(30);
+static constexpr VecV q31 = VecV(31);
+
+static constexpr VecV v0 = VecV(0);
+static constexpr VecV v1 = VecV(1);
+static constexpr VecV v2 = VecV(2);
+static constexpr VecV v3 = VecV(3);
+static constexpr VecV v4 = VecV(4);
+static constexpr VecV v5 = VecV(5);
+static constexpr VecV v6 = VecV(6);
+static constexpr VecV v7 = VecV(7);
+static constexpr VecV v8 = VecV(8);
+static constexpr VecV v9 = VecV(9);
+static constexpr VecV v10 = VecV(10);
+static constexpr VecV v11 = VecV(11);
+static constexpr VecV v12 = VecV(12);
+static constexpr VecV v13 = VecV(13);
+static constexpr VecV v14 = VecV(14);
+static constexpr VecV v15 = VecV(15);
+static constexpr VecV v16 = VecV(16);
+static constexpr VecV v17 = VecV(17);
+static constexpr VecV v18 = VecV(18);
+static constexpr VecV v19 = VecV(19);
+static constexpr VecV v20 = VecV(20);
+static constexpr VecV v21 = VecV(21);
+static constexpr VecV v22 = VecV(22);
+static constexpr VecV v23 = VecV(23);
+static constexpr VecV v24 = VecV(24);
+static constexpr VecV v25 = VecV(25);
+static constexpr VecV v26 = VecV(26);
+static constexpr VecV v27 = VecV(27);
+static constexpr VecV v28 = VecV(28);
+static constexpr VecV v29 = VecV(29);
+static constexpr VecV v30 = VecV(30);
+static constexpr VecV v31 = VecV(31);
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `a64::regs` accessible through `a64` namespace as well.
+using namespace regs;
+#endif
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64OPERAND_H_INCLUDED
--- a/src/asmjit/arm/a64rapass.cpp
+++ b/src/asmjit/arm/a64rapass.cpp
@ -0,0 +1,852 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_AARCH64) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../core/cpuinfo.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+#include "../arm/a64rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+// a64::ARMRAPass - Helpers
+// ========================
+
+// TODO: [ARM] These should be shared with all backends.
+ASMJIT_MAYBE_UNUSED
+static inline uint64_t raImmMaskFromSize(uint32_t size) noexcept {
+  ASMJIT_ASSERT(size > 0 && size < 256);
+  static const uint64_t masks[] = {
+    0x00000000000000FFu, //   1
+    0x000000000000FFFFu, //   2
+    0x00000000FFFFFFFFu, //   4
+    0xFFFFFFFFFFFFFFFFu, //   8
+    0x0000000000000000u, //  16
+    0x0000000000000000u, //  32
+    0x0000000000000000u, //  64
+    0x0000000000000000u, // 128
+    0x0000000000000000u  // 256
+  };
+  return masks[Support::ctz(size)];
+}
+
+static const RegMask raConsecutiveLeadCountToRegMaskFilter[5] = {
+  0xFFFFFFFFu, // [0] No consecutive.
+  0x00000000u, // [1] Invalid, never used.
+  0x7FFFFFFFu, // [2] 2 consecutive registers.
+  0x3FFFFFFFu, // [3] 3 consecutive registers.
+  0x1FFFFFFFu  // [4] 4 consecutive registers.
+};
+
+static inline RATiedFlags raUseOutFlagsFromRWFlags(OpRWFlags rwFlags) noexcept {
+  static constexpr RATiedFlags map[] = {
+    RATiedFlags::kNone,
+    RATiedFlags::kRead  | RATiedFlags::kUse, // kRead
+    RATiedFlags::kWrite | RATiedFlags::kOut, // kWrite
+    RATiedFlags::kRW    | RATiedFlags::kUse, // kRW
+  };
+
+  return map[uint32_t(rwFlags & OpRWFlags::kRW)];
+}
+
+static inline RATiedFlags raRegRwFlags(OpRWFlags flags) noexcept {
+  return raUseOutFlagsFromRWFlags(flags);
+}
+
+static inline RATiedFlags raMemBaseRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t shift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemBaseRW)>::value;
+  return raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> shift) & OpRWFlags::kRW);
+}
+
+static inline RATiedFlags raMemIndexRwFlags(OpRWFlags flags) noexcept {
+  constexpr uint32_t shift = Support::ConstCTZ<uint32_t(OpRWFlags::kMemIndexRW)>::value;
+  return raUseOutFlagsFromRWFlags(OpRWFlags(uint32_t(flags) >> shift) & OpRWFlags::kRW);
+}
+// a64::RACFGBuilder
+// =================
+
+class RACFGBuilder : public RACFGBuilderT<RACFGBuilder> {
+public:
+  Arch _arch;
+
+  inline RACFGBuilder(ARMRAPass* pass) noexcept
+    : RACFGBuilderT<RACFGBuilder>(pass),
+      _arch(pass->cc()->arch()) {}
+
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cc); }
+
+  Error onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept;
+
+  Error onBeforeInvoke(InvokeNode* invokeNode) noexcept;
+  Error onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept;
+
+  Error moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept;
+  Error moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept;
+  Error moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept;
+
+  Error onBeforeRet(FuncRetNode* funcRet) noexcept;
+  Error onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept;
+};
+
+// a64::RACFGBuilder - OnInst
+// ==========================
+
+// TODO: [ARM] This is just a workaround...
+static InstControlFlow getControlFlowType(InstId instId) noexcept {
+  switch (BaseInst::extractRealId(instId)) {
+    case Inst::kIdB:
+    case Inst::kIdBr:
+      if (BaseInst::extractARMCondCode(instId) == CondCode::kAL)
+        return InstControlFlow::kJump;
+      else
+        return InstControlFlow::kBranch;
+    case Inst::kIdBl:
+    case Inst::kIdBlr:
+      return InstControlFlow::kCall;
+    case Inst::kIdCbz:
+    case Inst::kIdCbnz:
+    case Inst::kIdTbz:
+    case Inst::kIdTbnz:
+      return InstControlFlow::kBranch;
+    case Inst::kIdRet:
+      return InstControlFlow::kReturn;
+    default:
+      return InstControlFlow::kRegular;
+  }
+}
+
+Error RACFGBuilder::onInst(InstNode* inst, InstControlFlow& controlType, RAInstBuilder& ib) noexcept {
+  InstRWInfo rwInfo;
+
+  if (Inst::isDefinedId(inst->realId())) {
+    InstId instId = inst->id();
+    uint32_t opCount = inst->opCount();
+    const Operand* opArray = inst->operands();
+    ASMJIT_PROPAGATE(InstInternal::queryRWInfo(_arch, inst->baseInst(), opArray, opCount, &rwInfo));
+
+    const InstDB::InstInfo& instInfo = InstDB::infoById(instId);
+    uint32_t singleRegOps = 0;
+
+    ib.addInstRWFlags(rwInfo.instFlags());
+
+    if (opCount) {
+      uint32_t consecutiveOffset = 0xFFFFFFFFu;
+      uint32_t consecutiveParent = Globals::kInvalidId;
+
+      for (uint32_t i = 0; i < opCount; i++) {
+        const Operand& op = opArray[i];
+        const OpRWInfo& opRwInfo = rwInfo.operand(i);
+
+        if (op.isReg()) {
+          // Register Operand
+          // ----------------
+          const Reg& reg = op.as<Reg>();
+
+          RATiedFlags flags = raRegRwFlags(opRwInfo.opFlags());
+          uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+          if (vIndex < Operand::kVirtIdCount) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+            // Use RW instead of Write in case that not the whole register is overwritten. This is important for
+            // liveness as we cannot kill a register that will be used.
+            if ((flags & RATiedFlags::kRW) == RATiedFlags::kWrite) {
+              if (workReg->regByteMask() & ~(opRwInfo.writeByteMask() | opRwInfo.extendByteMask())) {
+                // Not write-only operation.
+                flags = (flags & ~RATiedFlags::kOut) | (RATiedFlags::kRead | RATiedFlags::kUse);
+              }
+            }
+
+            RegGroup group = workReg->group();
+
+            RegMask useRegs = _pass->_availableRegs[group];
+            RegMask outRegs = useRegs;
+
+            uint32_t useId = BaseReg::kIdBad;
+            uint32_t outId = BaseReg::kIdBad;
+
+            uint32_t useRewriteMask = 0;
+            uint32_t outRewriteMask = 0;
+
+            if (opRwInfo.consecutiveLeadCount()) {
+              // There must be a single consecutive register lead, otherwise the RW data is invalid.
+              if (consecutiveOffset != 0xFFFFFFFFu)
+                return DebugUtils::errored(kErrorInvalidState);
+
+              // A consecutive lead register cannot be used as a consecutive +1/+2/+3 register, the registers must be distinct.
+              if (RATiedReg::consecutiveDataFromFlags(flags) != 0)
+                return DebugUtils::errored(kErrorNotConsecutiveRegs);
+
+              flags |= RATiedFlags::kLeadConsecutive | RATiedReg::consecutiveDataToFlags(opRwInfo.consecutiveLeadCount() - 1);
+              consecutiveOffset = 0;
+
+              RegMask filter = raConsecutiveLeadCountToRegMaskFilter[opRwInfo.consecutiveLeadCount()];
+              if (Support::test(flags, RATiedFlags::kUse)) {
+                flags |= RATiedFlags::kUseConsecutive;
+                useRegs &= filter;
+              }
+              else {
+                flags |= RATiedFlags::kOutConsecutive;
+                outRegs &= filter;
+              }
+            }
+
+            if (Support::test(flags, RATiedFlags::kUse)) {
+              useRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                useId = opRwInfo.physId();
+                flags |= RATiedFlags::kUseFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveOffset == 0xFFFFFFFFu)
+                  return DebugUtils::errored(kErrorInvalidState);
+                flags |= RATiedFlags::kUseConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+            else {
+              outRewriteMask = Support::bitMask(inst->getRewriteIndex(&reg._baseId));
+              if (opRwInfo.hasOpFlag(OpRWFlags::kRegPhysId)) {
+                outId = opRwInfo.physId();
+                flags |= RATiedFlags::kOutFixed;
+              }
+              else if (opRwInfo.hasOpFlag(OpRWFlags::kConsecutive)) {
+                if (consecutiveOffset == 0xFFFFFFFFu)
+                  return DebugUtils::errored(kErrorInvalidState);
+                flags |= RATiedFlags::kOutConsecutive | RATiedReg::consecutiveDataToFlags(++consecutiveOffset);
+              }
+            }
+
+            // Special cases regarding element access.
+            if (reg.as<Vec>().hasElementIndex()) {
+              // Only the first 0..15 registers can be used if the register uses
+              // element accessor that accesses half-words (h[0..7] elements).
+              if (instInfo.hasFlag(InstDB::kInstFlagVH0_15) && reg.as<Vec>().elementType() == Vec::kElementTypeH) {
+                if (Support::test(flags, RATiedFlags::kUse))
+                  useId &= 0x0000FFFFu;
+                else
+                  outId &= 0x0000FFFFu;
+              }
+            }
+
+            ASMJIT_PROPAGATE(ib.add(workReg, flags, useRegs, useId, useRewriteMask, outRegs, outId, outRewriteMask, opRwInfo.rmSize(), consecutiveParent));
+            if (singleRegOps == i)
+              singleRegOps++;
+
+            if (Support::test(flags, RATiedFlags::kLeadConsecutive | RATiedFlags::kUseConsecutive | RATiedFlags::kOutConsecutive))
+              consecutiveParent = workReg->workId();
+          }
+        }
+        else if (op.isMem()) {
+          // Memory Operand
+          // --------------
+          const Mem& mem = op.as<Mem>();
+
+          if (mem.isRegHome()) {
+            RAWorkReg* workReg;
+            ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(mem.baseId()), &workReg));
+            _pass->getOrCreateStackSlot(workReg);
+          }
+          else if (mem.hasBaseReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.baseId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemBaseRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask allocable = _pass->_availableRegs[group];
+
+              // Base registers have never fixed id on ARM.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._baseId));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, flags, allocable, useId, useRewriteMask, allocable, outId, outRewriteMask));
+            }
+          }
+
+          if (mem.hasIndexReg()) {
+            uint32_t vIndex = Operand::virtIdToIndex(mem.indexId());
+            if (vIndex < Operand::kVirtIdCount) {
+              RAWorkReg* workReg;
+              ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+              RATiedFlags flags = raMemIndexRwFlags(opRwInfo.opFlags());
+              RegGroup group = workReg->group();
+              RegMask allocable = _pass->_availableRegs[group];
+
+              // Index registers have never fixed id on ARM.
+              const uint32_t useId = BaseReg::kIdBad;
+              const uint32_t outId = BaseReg::kIdBad;
+
+              uint32_t useRewriteMask = 0;
+              uint32_t outRewriteMask = 0;
+
+              if (Support::test(flags, RATiedFlags::kUse))
+                useRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+              else
+                outRewriteMask = Support::bitMask(inst->getRewriteIndex(&mem._data[Operand::kDataMemIndexId]));
+
+              ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, allocable, useId, useRewriteMask, allocable, outId, outRewriteMask));
+            }
+          }
+        }
+      }
+    }
+
+    controlType = getControlFlowType(instId);
+  }
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - OnInvoke
+// ============================
+
+Error RACFGBuilder::onBeforeInvoke(InvokeNode* invokeNode) noexcept {
+  const FuncDetail& fd = invokeNode->detail();
+  uint32_t argCount = invokeNode->argCount();
+
+  cc()->_setCursor(invokeNode->prev());
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        break;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup != argGroup) {
+            // TODO: [ARM] Conversion is not supported.
+            return DebugUtils::errored(kErrorInvalidAssignment);
+          }
+        }
+        else {
+          ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+        }
+      }
+      else if (op.isImm()) {
+        if (arg.isReg()) {
+          BaseReg reg;
+          ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, op.as<Imm>(), &reg));
+          invokeNode->_args[argIndex][valueIndex] = reg;
+        }
+        else {
+          ASMJIT_PROPAGATE(moveImmToStackArg(invokeNode, arg, op.as<Imm>()));
+        }
+      }
+    }
+  }
+
+  cc()->_setCursor(invokeNode);
+
+  if (fd.hasRet()) {
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      const FuncValue& ret = fd.ret(valueIndex);
+      if (!ret)
+        break;
+
+      const Operand& op = invokeNode->ret(valueIndex);
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (ret.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup retGroup = Reg::groupOf(ret.regType());
+
+          if (regGroup != retGroup) {
+            // TODO: [ARM] Conversion is not supported.
+            return DebugUtils::errored(kErrorInvalidAssignment);
+          }
+        }
+      }
+    }
+  }
+
+  // This block has function call(s).
+  _curBlock->addFlags(RABlockFlags::kHasFuncCalls);
+  _pass->func()->frame().addAttributes(FuncAttributes::kHasFuncCalls);
+  _pass->func()->frame().updateCallStackSize(fd.argStackSize());
+
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onInvoke(InvokeNode* invokeNode, RAInstBuilder& ib) noexcept {
+  uint32_t argCount = invokeNode->argCount();
+  const FuncDetail& fd = invokeNode->detail();
+
+  for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+    const FuncValuePack& argPack = fd.argPack(argIndex);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      if (!argPack[valueIndex])
+        continue;
+
+      const FuncValue& arg = argPack[valueIndex];
+      const Operand& op = invokeNode->arg(argIndex, valueIndex);
+
+      if (op.isNone())
+        continue;
+
+      if (op.isReg()) {
+        const Reg& reg = op.as<Reg>();
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+        if (arg.isIndirect()) {
+          RegGroup regGroup = workReg->group();
+          if (regGroup != RegGroup::kGp)
+            return DebugUtils::errored(kErrorInvalidState);
+          ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+        }
+        else if (arg.isReg()) {
+          RegGroup regGroup = workReg->group();
+          RegGroup argGroup = Reg::groupOf(arg.regType());
+
+          if (regGroup == argGroup) {
+            ASMJIT_PROPAGATE(ib.addCallArg(workReg, arg.regId()));
+          }
+        }
+      }
+    }
+  }
+
+  for (uint32_t retIndex = 0; retIndex < Globals::kMaxValuePack; retIndex++) {
+    const FuncValue& ret = fd.ret(retIndex);
+    if (!ret)
+      break;
+
+    const Operand& op = invokeNode->ret(retIndex);
+    if (op.isReg()) {
+      const Reg& reg = op.as<Reg>();
+      RAWorkReg* workReg;
+      ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(Operand::virtIdToIndex(reg.id()), &workReg));
+
+      if (ret.isReg()) {
+        RegGroup regGroup = workReg->group();
+        RegGroup retGroup = Reg::groupOf(ret.regType());
+
+        if (regGroup == retGroup) {
+          ASMJIT_PROPAGATE(ib.addCallRet(workReg, ret.regId()));
+        }
+      }
+      else {
+        return DebugUtils::errored(kErrorInvalidAssignment);
+      }
+    }
+  }
+
+  // Setup clobbered registers.
+  ib._clobbered[0] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(0)]) & ~fd.preservedRegs(RegGroup(0));
+  ib._clobbered[1] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(1)]) & ~fd.preservedRegs(RegGroup(1));
+  ib._clobbered[2] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(2)]) & ~fd.preservedRegs(RegGroup(2));
+  ib._clobbered[3] = Support::lsbMask<RegMask>(_pass->_physRegCount[RegGroup(3)]) & ~fd.preservedRegs(RegGroup(3));
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - MoveImmToRegArg
+// ===================================
+
+Error RACFGBuilder::moveImmToRegArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_, BaseReg* out) noexcept {
+  DebugUtils::unused(invokeNode);
+  ASMJIT_ASSERT(arg.isReg());
+
+  Imm imm(imm_);
+  TypeId typeId = TypeId::kVoid;
+
+  switch (arg.typeId()) {
+    case TypeId::kInt8  : typeId = TypeId::kUInt64; imm.signExtend8Bits(); break;
+    case TypeId::kUInt8 : typeId = TypeId::kUInt64; imm.zeroExtend8Bits(); break;
+    case TypeId::kInt16 : typeId = TypeId::kUInt64; imm.signExtend16Bits(); break;
+    case TypeId::kUInt16: typeId = TypeId::kUInt64; imm.zeroExtend16Bits(); break;
+    case TypeId::kInt32 : typeId = TypeId::kUInt64; imm.signExtend32Bits(); break;
+    case TypeId::kUInt32: typeId = TypeId::kUInt64; imm.zeroExtend32Bits(); break;
+    case TypeId::kInt64 : typeId = TypeId::kUInt64; break;
+    case TypeId::kUInt64: typeId = TypeId::kUInt64; break;
+
+    default:
+      return DebugUtils::errored(kErrorInvalidAssignment);
+  }
+
+  ASMJIT_PROPAGATE(cc()->_newReg(out, typeId, nullptr));
+  cc()->virtRegById(out->id())->setWeight(BaseRAPass::kCallArgWeight);
+  return cc()->mov(out->as<Gp>(), imm);
+}
+
+// a64::RACFGBuilder - MoveImmToStackArg
+// =====================================
+
+Error RACFGBuilder::moveImmToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const Imm& imm_) noexcept {
+  BaseReg reg;
+
+  ASMJIT_PROPAGATE(moveImmToRegArg(invokeNode, arg, imm_, &reg));
+  ASMJIT_PROPAGATE(moveRegToStackArg(invokeNode, arg, reg));
+
+  return kErrorOk;
+}
+
+// a64::RACFGBuilder - MoveRegToStackArg
+// =====================================
+
+Error RACFGBuilder::moveRegToStackArg(InvokeNode* invokeNode, const FuncValue& arg, const BaseReg& reg) noexcept {
+  DebugUtils::unused(invokeNode);
+  Mem stackPtr = ptr(_pass->_sp.as<Gp>(), arg.stackOffset());
+
+  if (reg.isGp())
+    return cc()->str(reg.as<Gp>(), stackPtr);
+
+  if (reg.isVec())
+    return cc()->str(reg.as<Vec>(), stackPtr);
+
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+// a64::RACFGBuilder - OnReg
+// =========================
+
+Error RACFGBuilder::onBeforeRet(FuncRetNode* funcRet) noexcept {
+  DebugUtils::unused(funcRet);
+  return kErrorOk;
+}
+
+Error RACFGBuilder::onRet(FuncRetNode* funcRet, RAInstBuilder& ib) noexcept {
+  const FuncDetail& funcDetail = _pass->func()->detail();
+  const Operand* opArray = funcRet->operands();
+  uint32_t opCount = funcRet->opCount();
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand& op = opArray[i];
+    if (op.isNone()) continue;
+
+    const FuncValue& ret = funcDetail.ret(i);
+    if (ASMJIT_UNLIKELY(!ret.isReg()))
+      return DebugUtils::errored(kErrorInvalidAssignment);
+
+    if (op.isReg()) {
+      // Register return value.
+      const Reg& reg = op.as<Reg>();
+      uint32_t vIndex = Operand::virtIdToIndex(reg.id());
+
+      if (vIndex < Operand::kVirtIdCount) {
+        RAWorkReg* workReg;
+        ASMJIT_PROPAGATE(_pass->virtIndexAsWorkReg(vIndex, &workReg));
+
+        RegGroup group = workReg->group();
+        RegMask allocable = _pass->_availableRegs[group];
+        ASMJIT_PROPAGATE(ib.add(workReg, RATiedFlags::kUse | RATiedFlags::kRead, allocable, ret.regId(), 0, 0, BaseReg::kIdBad, 0));
+      }
+    }
+    else {
+      return DebugUtils::errored(kErrorInvalidAssignment);
+    }
+  }
+
+  return kErrorOk;
+}
+
+// a64::ARMRAPass - Construction & Destruction
+// ===========================================
+
+ARMRAPass::ARMRAPass() noexcept
+  : BaseRAPass() { _iEmitHelper = &_emitHelper; }
+ARMRAPass::~ARMRAPass() noexcept {}
+
+// a64::ARMRAPass - OnInit / OnDone
+// ================================
+
+void ARMRAPass::onInit() noexcept {
+  Arch arch = cc()->arch();
+
+  _emitHelper._emitter = _cb;
+
+  _archTraits = &ArchTraits::byArch(arch);
+  _physRegCount.set(RegGroup::kGp, 32);
+  _physRegCount.set(RegGroup::kVec, 32);
+  _physRegCount.set(RegGroup::kExtraVirt2, 0);
+  _physRegCount.set(RegGroup::kExtraVirt3, 0);
+  _buildPhysIndex();
+
+  _availableRegCount = _physRegCount;
+  _availableRegs[RegGroup::kGp] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kGp));
+  _availableRegs[RegGroup::kVec] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kVec));
+  _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kExtraVirt2));
+  _availableRegs[RegGroup::kExtraVirt3] = Support::lsbMask<uint32_t>(_physRegCount.get(RegGroup::kExtraVirt3));
+
+  _scratchRegIndexes[0] = uint8_t(27);
+  _scratchRegIndexes[1] = uint8_t(28);
+
+  // The architecture specific setup makes implicitly all registers available. So
+  // make unavailable all registers that are special and cannot be used in general.
+  bool hasFP = _func->frame().hasPreservedFP();
+
+  if (hasFP)
+    makeUnavailable(RegGroup::kGp, Gp::kIdFp);
+
+  makeUnavailable(RegGroup::kGp, Gp::kIdSp);
+  makeUnavailable(RegGroup::kGp, Gp::kIdOs); // OS-specific use, usually TLS.
+
+  _sp = sp;
+  _fp = x29;
+}
+
+void ARMRAPass::onDone() noexcept {}
+
+// a64::ARMRAPass - BuildCFG
+// =========================
+
+Error ARMRAPass::buildCFG() noexcept {
+  return RACFGBuilder(this).run();
+}
+
+// a64::ARMRAPass - Rewrite
+// ========================
+
+ASMJIT_FAVOR_SPEED Error ARMRAPass::_rewrite(BaseNode* first, BaseNode* stop) noexcept {
+  uint32_t virtCount = cc()->_vRegArray.size();
+
+  BaseNode* node = first;
+  while (node != stop) {
+    BaseNode* next = node->next();
+    if (node->isInst()) {
+      InstNode* inst = node->as<InstNode>();
+      RAInst* raInst = node->passData<RAInst>();
+
+      Operand* operands = inst->operands();
+      uint32_t opCount = inst->opCount();
+
+      uint32_t i;
+
+      // Rewrite virtual registers into physical registers.
+      if (raInst) {
+        // If the instruction contains pass data (raInst) then it was a subject
+        // for register allocation and must be rewritten to use physical regs.
+        RATiedReg* tiedRegs = raInst->tiedRegs();
+        uint32_t tiedCount = raInst->tiedCount();
+
+        for (i = 0; i < tiedCount; i++) {
+          RATiedReg* tiedReg = &tiedRegs[i];
+
+          Support::BitWordIterator<uint32_t> useIt(tiedReg->useRewriteMask());
+          uint32_t useId = tiedReg->useId();
+          while (useIt.hasNext())
+            inst->rewriteIdAtIndex(useIt.next(), useId);
+
+          Support::BitWordIterator<uint32_t> outIt(tiedReg->outRewriteMask());
+          uint32_t outId = tiedReg->outId();
+          while (outIt.hasNext())
+            inst->rewriteIdAtIndex(outIt.next(), outId);
+        }
+
+        // This data is allocated by Zone passed to `runOnFunction()`, which
+        // will be reset after the RA pass finishes. So reset this data to
+        // prevent having a dead pointer after the RA pass is complete.
+        node->resetPassData();
+
+        if (ASMJIT_UNLIKELY(node->type() != NodeType::kInst)) {
+          // FuncRet terminates the flow, it must either be removed if the exit
+          // label is next to it (optimization) or patched to an architecture
+          // dependent jump instruction that jumps to the function's exit before
+          // the epilog.
+          if (node->type() == NodeType::kFuncRet) {
+            RABlock* block = raInst->block();
+            if (!isNextTo(node, _func->exitNode())) {
+              cc()->_setCursor(node->prev());
+              ASMJIT_PROPAGATE(emitJump(_func->exitNode()->label()));
+            }
+
+            BaseNode* prev = node->prev();
+            cc()->removeNode(node);
+            block->setLast(prev);
+          }
+        }
+      }
+
+      // Rewrite stack slot addresses.
+      for (i = 0; i < opCount; i++) {
+        Operand& op = operands[i];
+        if (op.isMem()) {
+          BaseMem& mem = op.as<BaseMem>();
+          if (mem.isRegHome()) {
+            uint32_t virtIndex = Operand::virtIdToIndex(mem.baseId());
+            if (ASMJIT_UNLIKELY(virtIndex >= virtCount))
+              return DebugUtils::errored(kErrorInvalidVirtId);
+
+            VirtReg* virtReg = cc()->virtRegByIndex(virtIndex);
+            RAWorkReg* workReg = virtReg->workReg();
+            ASMJIT_ASSERT(workReg != nullptr);
+
+            RAStackSlot* slot = workReg->stackSlot();
+            int32_t offset = slot->offset();
+
+            mem._setBase(_sp.type(), slot->baseRegId());
+            mem.clearRegHome();
+            mem.addOffsetLo32(offset);
+          }
+        }
+      }
+
+      // Rewrite `loadAddressOf()` construct.
+      if (inst->realId() == Inst::kIdAdr && inst->opCount() == 2 && inst->op(1).isMem()) {
+        BaseMem mem = inst->op(1).as<BaseMem>();
+        int64_t offset = mem.offset();
+
+        if (!mem.hasBaseOrIndex()) {
+          inst->setId(Inst::kIdMov);
+          inst->setOp(1, Imm(offset));
+        }
+        else {
+          if (mem.hasIndex())
+            return DebugUtils::errored(kErrorInvalidAddressIndex);
+
+          GpX dst(inst->op(0).as<Gp>().id());
+          GpX base(mem.baseId());
+
+          InstId arithInstId = offset < 0 ? Inst::kIdSub : Inst::kIdAdd;
+          uint64_t absOffset = offset < 0 ? Support::neg(uint64_t(offset)) : uint64_t(offset);
+
+          inst->setId(arithInstId);
+          inst->setOpCount(3);
+          inst->setOp(1, base);
+          inst->setOp(2, Imm(absOffset));
+
+          // Use two operations if the offset cannot be encoded with ADD/SUB.
+          if (absOffset > 0xFFFu && (absOffset & ~uint64_t(0xFFF000u)) != 0) {
+            if (absOffset <= 0xFFFFFFu) {
+              cc()->_setCursor(inst->prev());
+              ASMJIT_PROPAGATE(cc()->emit(arithInstId, dst, base, Imm(absOffset & 0xFFFu)));
+
+              inst->setOp(1, dst);
+              inst->setOp(2, Imm(absOffset & 0xFFF000u));
+            }
+            else {
+              cc()->_setCursor(inst->prev());
+              ASMJIT_PROPAGATE(cc()->emit(Inst::kIdMov, inst->op(0), Imm(absOffset)));
+
+              inst->setOp(1, base);
+              inst->setOp(2, dst);
+            }
+          }
+        }
+      }
+    }
+
+    node = next;
+  }
+
+  return kErrorOk;
+}
+
+// a64::ARMRAPass - Prolog & Epilog
+// ================================
+
+Error ARMRAPass::updateStackFrame() noexcept {
+  if (_func->frame().hasFuncCalls())
+    _func->frame().addDirtyRegs(RegGroup::kGp, Support::bitMask(Gp::kIdLr));
+
+  return BaseRAPass::updateStackFrame();
+}
+
+// a64::ARMRAPass - OnEmit
+// =======================
+
+Error ARMRAPass::emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dst(wReg->signature(), dstPhysId);
+  BaseReg src(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<MOVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dst, src, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept {
+  DebugUtils::unused(aWorkId, aPhysId, bWorkId, bPhysId);
+  return DebugUtils::errored(kErrorInvalidState);
+}
+
+Error ARMRAPass::emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseReg dstReg(wReg->signature(), dstPhysId);
+  BaseMem srcMem(workRegAsMem(wReg));
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<LOAD> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstReg, srcMem, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitSave(uint32_t workId, uint32_t srcPhysId) noexcept {
+  RAWorkReg* wReg = workRegById(workId);
+  BaseMem dstMem(workRegAsMem(wReg));
+  BaseReg srcReg(wReg->signature(), srcPhysId);
+
+  const char* comment = nullptr;
+
+#ifndef ASMJIT_NO_LOGGING
+  if (hasDiagnosticOption(DiagnosticOptions::kRAAnnotate)) {
+    _tmpString.assignFormat("<SAVE> %s", workRegById(workId)->name());
+    comment = _tmpString.data();
+  }
+#endif
+
+  return _emitHelper.emitRegMove(dstMem, srcReg, wReg->typeId(), comment);
+}
+
+Error ARMRAPass::emitJump(const Label& label) noexcept {
+  return cc()->b(label);
+}
+
+Error ARMRAPass::emitPreCall(InvokeNode* invokeNode) noexcept {
+  DebugUtils::unused(invokeNode);
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_AARCH64 && !ASMJIT_NO_COMPILER
--- a/src/asmjit/arm/a64rapass_p.h
+++ b/src/asmjit/arm/a64rapass_p.h
@ -0,0 +1,105 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
+#define ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/rabuilders_p.h"
+#include "../core/rapass_p.h"
+#include "../arm/a64assembler.h"
+#include "../arm/a64compiler.h"
+#include "../arm/a64emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_a64
+//! \{
+
+//! ARM register allocation pass.
+//!
+//! Takes care of generating function prologs and epilogs, and also performs
+//! register allocation.
+class ARMRAPass : public BaseRAPass {
+public:
+  ASMJIT_NONCOPYABLE(ARMRAPass)
+  typedef BaseRAPass Base;
+
+  EmitHelper _emitHelper;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ARMRAPass() noexcept;
+  virtual ~ARMRAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler casted to `arm::Compiler`.
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
+
+  //! Returns emit helper.
+  inline EmitHelper* emitHelper() noexcept { return &_emitHelper; }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  void onInit() noexcept override;
+  void onDone() noexcept override;
+
+  //! \}
+
+  //! \name CFG
+  //! \{
+
+  Error buildCFG() noexcept override;
+
+  //! \}
+
+  //! \name Rewrite
+  //! \{
+
+  Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override;
+
+  //! \}
+
+  //! \name Prolog & Epilog
+  //! \{
+
+  Error updateStackFrame() noexcept override;
+
+  //! \}
+
+  //! \name Emit Helpers
+  //! \{
+
+  Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
+  Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
+
+  Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
+  Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
+
+  Error emitJump(const Label& label) noexcept override;
+  Error emitPreCall(InvokeNode* invokeNode) noexcept override;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_ARM_A64RAPASS_P_H_INCLUDED
--- a/src/asmjit/arm/a64utils.h
+++ b/src/asmjit/arm/a64utils.h
@ -0,0 +1,179 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_A64UTILS_H_INCLUDED
+#define ASMJIT_ARM_A64UTILS_H_INCLUDED
+
+#include "../arm/a64globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(a64)
+
+//! \addtogroup asmjit_a64
+//! \{
+
+//! Public utilities and helpers for targeting AArch64 architecture.
+namespace Utils {
+
+//! Decomposed fields of a logical immediate value (AArch64).
+struct LogicalImm {
+  uint32_t n;
+  uint32_t s;
+  uint32_t r;
+};
+
+//! Encodes the given `imm` value of the given `width` to a logical immediate value represented as N, S, and R fields
+//! and writes these fields to `out`.
+//!
+//! Encoding Table:
+//!
+//! ```
+//! +---+--------+--------+------+
+//! | N |  ImmS  |  ImmR  | Size |
+//! +---+--------+--------+------+
+//! | 1 | ssssss | rrrrrr |  64  |
+//! | 0 | 0sssss | .rrrrr |  32  |
+//! | 0 | 10ssss | ..rrrr |  16  |
+//! | 0 | 110sss | ...rrr |  8   |
+//! | 0 | 1110ss | ....rr |  4   |
+//! | 0 | 11110s | .....r |  2   |
+//! +---+--------+--------+------+
+//! ```
+ASMJIT_MAYBE_UNUSED
+static bool encodeLogicalImm(uint64_t imm, uint32_t width, a64::Utils::LogicalImm* out) noexcept {
+  // Determine the element width, which must be 2, 4, 8, 16, 32, or 64 bits.
+  do {
+    width /= 2;
+    uint64_t mask = (uint64_t(1) << width) - 1u;
+    if ((imm & mask) != ((imm >> width) & mask)) {
+      width *= 2;
+      break;
+    }
+  } while (width > 2);
+
+  // Patterns of all zeros and all ones are not encodable.
+  uint64_t lsbMask = Support::lsbMask<uint64_t>(width);
+  imm &= lsbMask;
+
+  if (imm == 0 || imm == lsbMask)
+    return false;
+
+  // Inspect the pattern and get the most important bit indexes.
+  //
+  //         oIndex <-+      +-> zIndex
+  //                  |      |
+  // |..zeros..|oCount|zCount|..ones..|
+  // |000000000|111111|000000|11111111|
+
+  uint32_t zIndex = Support::ctz(~imm);
+  uint64_t zImm = imm ^ ((uint64_t(1) << zIndex) - 1);
+  uint32_t zCount = (zImm ? Support::ctz(zImm) : width) - zIndex;
+
+  uint32_t oIndex = zIndex + zCount;
+  uint64_t oImm = ~(zImm ^ Support::lsbMask<uint64_t>(oIndex));
+  uint32_t oCount = (oImm ? Support::ctz(oImm) : width) - (oIndex);
+
+  // Verify whether the bit-pattern is encodable.
+  uint64_t mustBeZero = oImm ^ ~Support::lsbMask<uint64_t>(oIndex + oCount);
+  if (mustBeZero != 0 || (zIndex > 0 && width - (oIndex + oCount) != 0))
+    return false;
+
+  out->n = width == 64;
+  out->s = (oCount + zIndex - 1) | (Support::neg(width * 2) & 0x3F);
+  out->r = width - oIndex;
+  return true;
+}
+
+//! Returns true if the given `imm` value is encodable as a logical immediate. The `width` argument describes the
+//! width of the operation, and must be either 32 or 64. This function can be used to test whether an immediate
+//! value can be used with AND, ANDS, BIC, BICS, EON, EOR, ORN, and ORR instruction.
+ASMJIT_MAYBE_UNUSED
+static inline bool isLogicalImm(uint64_t imm, uint32_t width) noexcept {
+  LogicalImm dummy;
+  return encodeLogicalImm(imm, width, &dummy);
+}
+
+//! Returns true if the given `imm` value is a byte mask. Byte mask has each byte part of the value set to either
+//! 0x00 or 0xFF. Some ARM instructions accept immediates that form a byte-mask and this function can be used to
+//! verify that the immediate is encodable before using the value.
+template<typename T>
+static inline bool isByteMaskImm8(const T& imm) noexcept {
+  constexpr T kMask = T(0x0101010101010101 & Support::allOnes<T>());
+  return imm == (imm & kMask) * T(255);
+}
+
+//! \cond
+//! A generic implementation that checjs whether a floating point value can be converted to ARM Imm8.
+template<typename T, uint32_t kNumBBits, uint32_t kNumCDEFGHBits, uint32_t kNumZeroBits>
+static inline bool isFPImm8Generic(T val) noexcept {
+  constexpr uint32_t kAllBsMask = Support::lsbMask<uint32_t>(kNumBBits);
+  constexpr uint32_t kB0Pattern = Support::bitMask(kNumBBits - 1);
+  constexpr uint32_t kB1Pattern = kAllBsMask ^ kB0Pattern;
+
+  T immZ = val & Support::lsbMask<T>(kNumZeroBits);
+  uint32_t immB = uint32_t(val >> (kNumZeroBits + kNumCDEFGHBits)) & kAllBsMask;
+
+  // ImmZ must be all zeros and ImmB must either be B0 or B1 pattern.
+  return immZ == 0 && (immB == kB0Pattern || immB == kB1Pattern);
+}
+//! \endcond
+
+//! Returns true if the given half precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbcdef|gh000000]
+//! ```
+static inline bool isFP16Imm8(uint32_t val) noexcept { return isFPImm8Generic<uint32_t, 3, 6, 6>(val); }
+
+//! Returns true if the given single precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbbbbc|defgh000|00000000|00000000]
+//! ```
+static inline bool isFP32Imm8(uint32_t val) noexcept { return isFPImm8Generic<uint32_t, 6, 6, 19>(val); }
+//! \overload
+static inline bool isFP32Imm8(float val) noexcept { return isFP32Imm8(Support::bitCast<uint32_t>(val)); }
+
+//! Returns true if the given double precision floating point `val` can be encoded as ARM IMM8 value, which represents
+//! a limited set of floating point immediate values, which can be used with FMOV instruction.
+//!
+//! The floating point must have bits distributed in the following way:
+//!
+//! ```
+//! [aBbbbbbb|bbcdefgh|00000000|00000000|00000000|00000000|00000000|00000000]
+//! ```
+static inline bool isFP64Imm8(uint64_t val) noexcept { return isFPImm8Generic<uint64_t, 9, 6, 48>(val); }
+//! \overload
+static inline bool isFP64Imm8(double val) noexcept { return isFP64Imm8(Support::bitCast<uint64_t>(val)); }
+
+//! \cond
+template<typename T, uint32_t kNumBBits, uint32_t kNumCDEFGHBits, uint32_t kNumZeroBits>
+static inline uint32_t encodeFPToImm8Generic(T val) noexcept {
+  uint32_t bits = uint32_t(val >> kNumZeroBits);
+  return ((bits >> (kNumBBits + kNumCDEFGHBits - 7)) & 0x80u) | (bits & 0x7F);
+}
+//! \endcond
+
+//! Encodes a double precision floating point value into IMM8 format.
+//!
+//! \note This function expects that `isFP64Imm8(val) == true` so it doesn't perform any checks of the value and just
+//! rearranges some bits into Imm8 order.
+static inline uint32_t encodeFP64ToImm8(uint64_t val) noexcept { return encodeFPToImm8Generic<uint64_t, 9, 6, 48>(val); }
+//! \overload
+static inline uint32_t encodeFP64ToImm8(double val) noexcept { return encodeFP64ToImm8(Support::bitCast<uint64_t>(val)); }
+
+} // {Utils}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_A64UTILS_H_INCLUDED
+
--- a/src/asmjit/arm/armformatter.cpp
+++ b/src/asmjit/arm/armformatter.cpp
@ -0,0 +1,143 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../arm/armformatter_p.h"
+#include "../arm/armoperand.h"
+#include "../arm/a64instapi_p.h"
+#include "../arm/a64instdb_p.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+// arm::FormatterInternal - Format Feature
+// =======================================
+
+Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept {
+  // @EnumStringBegin{"enum": "CpuFeatures::ARM", "output": "sFeature", "strip": "k"}@
+  static const char sFeatureString[] =
+    "None\0"
+    "THUMB\0"
+    "THUMBv2\0"
+    "ARMv6\0"
+    "ARMv7\0"
+    "ARMv8a\0"
+    "ARMv8_1a\0"
+    "ARMv8_2a\0"
+    "ARMv8_3a\0"
+    "ARMv8_4a\0"
+    "ARMv8_5a\0"
+    "ARMv8_6a\0"
+    "ARMv8_7a\0"
+    "VFPv2\0"
+    "VFPv3\0"
+    "VFPv4\0"
+    "VFP_D32\0"
+    "AES\0"
+    "ALTNZCV\0"
+    "ASIMD\0"
+    "BF16\0"
+    "BTI\0"
+    "CPUID\0"
+    "CRC32\0"
+    "DGH\0"
+    "DIT\0"
+    "DOTPROD\0"
+    "EDSP\0"
+    "FCMA\0"
+    "FJCVTZS\0"
+    "FLAGM\0"
+    "FP16CONV\0"
+    "FP16FML\0"
+    "FP16FULL\0"
+    "FRINT\0"
+    "I8MM\0"
+    "IDIVA\0"
+    "IDIVT\0"
+    "LSE\0"
+    "MTE\0"
+    "RCPC_IMMO\0"
+    "RDM\0"
+    "PMU\0"
+    "PMULL\0"
+    "RNG\0"
+    "SB\0"
+    "SHA1\0"
+    "SHA2\0"
+    "SHA3\0"
+    "SHA512\0"
+    "SM3\0"
+    "SM4\0"
+    "SSBS\0"
+    "SVE\0"
+    "SVE_BF16\0"
+    "SVE_F32MM\0"
+    "SVE_F64MM\0"
+    "SVE_I8MM\0"
+    "SVE_PMULL\0"
+    "SVE2\0"
+    "SVE2_AES\0"
+    "SVE2_BITPERM\0"
+    "SVE2_SHA3\0"
+    "SVE2_SM4\0"
+    "TME\0"
+    "<Unknown>\0";
+
+  static const uint16_t sFeatureIndex[] = {
+    0, 5, 11, 19, 25, 31, 38, 47, 56, 65, 74, 83, 92, 101, 107, 113, 119, 127,
+    131, 139, 145, 150, 154, 160, 166, 170, 174, 182, 187, 192, 200, 206, 215,
+    223, 232, 238, 243, 249, 255, 259, 263, 273, 277, 281, 287, 291, 294, 299,
+    304, 309, 316, 320, 324, 329, 333, 342, 352, 362, 371, 381, 386, 395, 408,
+    418, 427, 431
+  };
+  // @EnumStringEnd@
+
+  return sb.append(sFeatureString + sFeatureIndex[Support::min<uint32_t>(featureId, uint32_t(CpuFeatures::ARM::kMaxValue) + 1)]);
+}
+
+// arm::FormatterInternal - Format Constants
+// =========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatCondCode(String& sb, CondCode cc) noexcept {
+  static const char condCodeData[] =
+    "al\0" "na\0"
+    "eq\0" "ne\0"
+    "cs\0" "cc\0" "mi\0" "pl\0" "vs\0" "vc\0"
+    "hi\0" "ls\0" "ge\0" "lt\0" "gt\0" "le\0"
+    "<Unknown>";
+  return sb.append(condCodeData + Support::min<uint32_t>(uint32_t(cc), 16u) * 3);
+}
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatShiftOp(String& sb, ShiftOp shiftOp) noexcept {
+  const char* str = "<Unknown>";
+  switch (shiftOp) {
+    case ShiftOp::kLSL: str = "lsl"; break;
+    case ShiftOp::kLSR: str = "lsr"; break;
+    case ShiftOp::kASR: str = "asr"; break;
+    case ShiftOp::kROR: str = "ror"; break;
+    case ShiftOp::kRRX: str = "rrx"; break;
+    case ShiftOp::kMSL: str = "msl"; break;
+    case ShiftOp::kUXTB: str = "uxtb"; break;
+    case ShiftOp::kUXTH: str = "uxth"; break;
+    case ShiftOp::kUXTW: str = "uxtw"; break;
+    case ShiftOp::kUXTX: str = "uxtx"; break;
+    case ShiftOp::kSXTB: str = "sxtb"; break;
+    case ShiftOp::kSXTH: str = "sxth"; break;
+    case ShiftOp::kSXTW: str = "sxtw"; break;
+    case ShiftOp::kSXTX: str = "sxtx"; break;
+  }
+  return sb.append(str);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
--- a/src/asmjit/arm/armformatter_p.h
+++ b/src/asmjit/arm/armformatter_p.h
@ -0,0 +1,44 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
+#define ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../arm/armglobals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_arm
+//! \{
+
+namespace FormatterInternal {
+
+Error ASMJIT_CDECL formatFeature(
+  String& sb,
+  uint32_t featureId) noexcept;
+
+Error ASMJIT_CDECL formatCondCode(
+  String& sb,
+  CondCode cc) noexcept;
+
+Error ASMJIT_CDECL formatShiftOp(
+  String& sb,
+  ShiftOp shiftOp) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_ARM_ARMFORMATTER_P_H_INCLUDED
--- a/src/asmjit/arm/armglobals.h
+++ b/src/asmjit/arm/armglobals.h
@ -0,0 +1,21 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
+#define ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
+
+#include "../core/archcommons.h"
+#include "../core/inst.h"
+
+//! \namespace asmjit::arm
+//! \ingroup asmjit_arm
+//!
+//! API shared between AArch32 & AArch64 backends.
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_ARM_ARMGLOBALS_H_INCLUDED
--- a/src/asmjit/arm/armoperand.h
+++ b/src/asmjit/arm/armoperand.h
@ -0,0 +1,621 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_ARM_ARMOPERAND_H_INCLUDED
+#define ASMJIT_ARM_ARMOPERAND_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/operand.h"
+#include "../core/type.h"
+#include "../arm/armglobals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(arm)
+
+//! \addtogroup asmjit_arm
+//! \{
+
+class Reg;
+class Mem;
+
+class Gp;
+class GpW;
+class GpX;
+
+class Vec;
+class VecB;
+class VecH;
+class VecS;
+class VecD;
+class VecV;
+
+//! Register traits (ARM/AArch64).
+//!
+//! Register traits contains information about a particular register type. It's used by asmjit to setup register
+//! information on-the-fly and to populate tables that contain register information (this way it's possible to
+//! change register types and groups without having to reorder these tables).
+template<RegType kRegType>
+struct RegTraits : public BaseRegTraits {};
+
+//! \cond
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+//                      | Reg |        Reg-Type         |        Reg-Group       |Sz |Cnt|      TypeId      |
+// <--------------------+-----+-------------------------+------------------------+---+---+------------------+
+ASMJIT_DEFINE_REG_TRAITS(GpW  , RegType::kARM_GpW       , RegGroup::kGp          , 4 , 32, TypeId::kInt32   );
+ASMJIT_DEFINE_REG_TRAITS(GpX  , RegType::kARM_GpX       , RegGroup::kGp          , 8 , 32, TypeId::kInt64   );
+ASMJIT_DEFINE_REG_TRAITS(VecB , RegType::kARM_VecB      , RegGroup::kVec         , 1 , 32, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(VecH , RegType::kARM_VecH      , RegGroup::kVec         , 2 , 32, TypeId::kVoid    );
+ASMJIT_DEFINE_REG_TRAITS(VecS , RegType::kARM_VecS      , RegGroup::kVec         , 4 , 32, TypeId::kInt32x1 );
+ASMJIT_DEFINE_REG_TRAITS(VecD , RegType::kARM_VecD      , RegGroup::kVec         , 8 , 32, TypeId::kInt32x2 );
+ASMJIT_DEFINE_REG_TRAITS(VecV , RegType::kARM_VecV      , RegGroup::kVec         , 16, 32, TypeId::kInt32x4 );
+//! \endcond
+
+//! Register (ARM).
+class Reg : public BaseReg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Reg, BaseReg)
+
+  //! Gets whether the register is a `R|W` register (32-bit).
+  inline constexpr bool isGpW() const noexcept { return baseSignature() == RegTraits<RegType::kARM_GpW>::kSignature; }
+  //! Gets whether the register is an `X` register (64-bit).
+  inline constexpr bool isGpX() const noexcept { return baseSignature() == RegTraits<RegType::kARM_GpX>::kSignature; }
+  //! Gets whether the register is a VEC-B register (8-bit).
+  inline constexpr bool isVecB() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecB>::kSignature; }
+  //! Gets whether the register is a VEC-H register (16-bit).
+  inline constexpr bool isVecH() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecH>::kSignature; }
+  //! Gets whether the register is a VEC-S register (32-bit).
+  inline constexpr bool isVecS() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecS>::kSignature; }
+  //! Gets whether the register is a VEC-D register (64-bit).
+  inline constexpr bool isVecD() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecD>::kSignature; }
+  //! Gets whether the register is a VEC-Q register (128-bit).
+  inline constexpr bool isVecQ() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecV>::kSignature; }
+
+  //! Gets whether the register is either VEC-D (64-bit) or VEC-Q (128-bit).
+  inline constexpr bool isVecDOrQ() const noexcept { return uint32_t(type()) - uint32_t(RegType::kARM_VecD) <= 1u; }
+
+  //! Gets whether the register is a VEC-V register (128-bit).
+  inline constexpr bool isVecV() const noexcept { return baseSignature() == RegTraits<RegType::kARM_VecV>::kSignature; }
+
+  template<RegType kRegType>
+  inline void setRegT(uint32_t id) noexcept {
+    setSignature(RegTraits<kRegType>::kSignature);
+    setId(id);
+  }
+
+  inline void setTypeAndId(RegType type, uint32_t id) noexcept {
+    setSignature(signatureOf(type));
+    setId(id);
+  }
+
+  static inline RegGroup groupOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToGroup(type); }
+  static inline TypeId typeIdOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToTypeId(type); }
+  static inline OperandSignature signatureOf(RegType type) noexcept { return ArchTraits::byArch(Arch::kAArch64).regTypeToSignature(type); }
+
+  template<RegType kRegType>
+  static inline RegGroup groupOfT() noexcept { return RegTraits<kRegType>::kGroup; }
+
+  template<RegType kRegType>
+  static inline TypeId typeIdOfT() noexcept { return RegTraits<kRegType>::kTypeId; }
+
+  template<RegType kRegType>
+  static inline OperandSignature signatureOfT() noexcept { return RegTraits<kRegType>::kSignature; }
+
+  static inline bool isGpW(const Operand_& op) noexcept { return op.as<Reg>().isGpW(); }
+  static inline bool isGpX(const Operand_& op) noexcept { return op.as<Reg>().isGpX(); }
+  static inline bool isVecB(const Operand_& op) noexcept { return op.as<Reg>().isVecB(); }
+  static inline bool isVecH(const Operand_& op) noexcept { return op.as<Reg>().isVecH(); }
+  static inline bool isVecS(const Operand_& op) noexcept { return op.as<Reg>().isVecS(); }
+  static inline bool isVecD(const Operand_& op) noexcept { return op.as<Reg>().isVecD(); }
+  static inline bool isVecQ(const Operand_& op) noexcept { return op.as<Reg>().isVecQ(); }
+  static inline bool isVecV(const Operand_& op) noexcept { return op.as<Reg>().isVecV(); }
+
+  static inline bool isGpW(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isGpW(op)) & unsigned(op.id() == id)); }
+  static inline bool isGpX(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isGpX(op)) & unsigned(op.id() == id)); }
+  static inline bool isVecB(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isVecB(op)) & unsigned(op.id() == id)); }
+  static inline bool isVecH(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isVecH(op)) & unsigned(op.id() == id)); }
+  static inline bool isVecS(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isVecS(op)) & unsigned(op.id() == id)); }
+  static inline bool isVecD(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isVecD(op)) & unsigned(op.id() == id)); }
+  static inline bool isVecQ(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isVecQ(op)) & unsigned(op.id() == id)); }
+  static inline bool isVecV(const Operand_& op, uint32_t id) noexcept { return bool(unsigned(isVecV(op)) & unsigned(op.id() == id)); }
+};
+
+//! General purpose register (ARM).
+class Gp : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Gp, Reg)
+
+  //! Special register id.
+  enum Id : uint32_t {
+    //! Register that depends on OS, could be used as TLS offset.
+    kIdOs = 18,
+    //! Frame pointer.
+    kIdFp = 29,
+    //! Link register.
+    kIdLr = 30,
+    //! Stack register id.
+    kIdSp = 31,
+    //! Zero register id.
+    //!
+    //! Although zero register has the same id as stack register it has a special treatment, because we need to be
+    //! able to distinguish between these two at API level. Some intructions were designed to be used with SP and
+    //! some other with ZR - so we need a way to distinguish these two to make sure we emit the right thing.
+    //!
+    //! The number 63 is not random, when you perform `id & 31` you would always get 31 for both SP and ZR inputs,
+    //! which is the identifier used by AArch64 ISA to encode either SP or ZR depending on the instruction.
+    kIdZr = 63
+  };
+
+  inline constexpr bool isZR() const noexcept { return id() == kIdZr; }
+  inline constexpr bool isSP() const noexcept { return id() == kIdSp; }
+
+  //! Cast this register to a 32-bit R|W.
+  inline GpW w() const noexcept;
+  //! Cast this register to a 64-bit X.
+  inline GpX x() const noexcept;
+};
+
+//! Vector register (ARM).
+class Vec : public Reg {
+public:
+  ASMJIT_DEFINE_ABSTRACT_REG(Vec, Reg)
+
+  //! Additional signature bits used by arm::Vec.
+  enum AdditionalBits : uint32_t {
+    // Register element type (3 bits).
+    // |........|........|.XXX....|........|
+    kSignatureRegElementTypeShift = 12,
+    kSignatureRegElementTypeMask = 0x07 << kSignatureRegElementTypeShift,
+
+    // Register has element index (1 bit).
+    // |........|........|X.......|........|
+    kSignatureRegElementFlagShift = 15,
+    kSignatureRegElementFlagMask = 0x01 << kSignatureRegElementFlagShift,
+
+    // Register element index (4 bits).
+    // |........|....XXXX|........|........|
+    kSignatureRegElementIndexShift = 16,
+    kSignatureRegElementIndexMask = 0x0F << kSignatureRegElementIndexShift
+  };
+
+  //! Element type.
+  enum ElementType : uint32_t {
+    //! No element type specified.
+    kElementTypeNone = 0,
+    //! Byte elements (B8 or B16).
+    kElementTypeB,
+    //! Halfword elements (H4 or H8).
+    kElementTypeH,
+    //! Singleword elements (S2 or S4).
+    kElementTypeS,
+    //! Doubleword elements (D2).
+    kElementTypeD,
+    //! Byte elements grouped by 4 bytes (B4).
+    //!
+    //! \note This element-type is only used by few instructions.
+    kElementTypeB4,
+    //! Halfword elements grouped by 2 halfwords (H2).
+    //!
+    //! \note This element-type is only used by few instructions.
+    kElementTypeH2,
+
+    //! Count of element types.
+    kElementTypeCount
+  };
+
+  //! \cond
+  //! Shortcuts.
+  enum SignatureReg : uint32_t {
+    kSignatureElementB  = kElementTypeB  << kSignatureRegElementTypeShift,
+    kSignatureElementH  = kElementTypeH  << kSignatureRegElementTypeShift,
+    kSignatureElementS  = kElementTypeS  << kSignatureRegElementTypeShift,
+    kSignatureElementD  = kElementTypeD  << kSignatureRegElementTypeShift,
+    kSignatureElementB4 = kElementTypeB4 << kSignatureRegElementTypeShift,
+    kSignatureElementH2 = kElementTypeH2 << kSignatureRegElementTypeShift
+  };
+  //! \endcond
+
+  //! Returns whether the register has associated an element type.
+  inline constexpr bool hasElementType() const noexcept { return _signature.hasField<kSignatureRegElementTypeMask>(); }
+  //! Returns whether the register has element index (it's an element index access).
+  inline constexpr bool hasElementIndex() const noexcept { return _signature.hasField<kSignatureRegElementFlagMask>(); }
+  //! Returns whether the reggister has element type or element index (or both).
+  inline constexpr bool hasElementTypeOrIndex() const noexcept { return _signature.hasField<kSignatureRegElementTypeMask | kSignatureRegElementFlagMask>(); }
+
+  //! Returns element type of the register.
+  inline constexpr uint32_t elementType() const noexcept { return _signature.getField<kSignatureRegElementTypeMask>(); }
+  //! Sets element type of the register to `elementType`.
+  inline void setElementType(uint32_t elementType) noexcept { _signature.setField<kSignatureRegElementTypeMask>(elementType); }
+  //! Resets element type to none.
+  inline void resetElementType() noexcept { _signature.setField<kSignatureRegElementTypeMask>(0); }
+
+  //! Returns element index of the register.
+  inline constexpr uint32_t elementIndex() const noexcept { return _signature.getField<kSignatureRegElementIndexMask>(); }
+  //! Sets element index of the register to `elementType`.
+  inline void setElementIndex(uint32_t elementIndex) noexcept {
+    _signature |= kSignatureRegElementFlagMask;
+    _signature.setField<kSignatureRegElementIndexMask>(elementIndex);
+  }
+  //! Resets element index of the register.
+  inline void resetElementIndex() noexcept {
+    _signature &= ~(kSignatureRegElementFlagMask | kSignatureRegElementIndexMask);
+  }
+
+  inline constexpr bool isVecB8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementB); }
+  inline constexpr bool isVecH4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementH); }
+  inline constexpr bool isVecS2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature | kSignatureElementS); }
+  inline constexpr bool isVecD1() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecD>::kSignature); }
+
+  inline constexpr bool isVecB16() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementB); }
+  inline constexpr bool isVecH8() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementH); }
+  inline constexpr bool isVecS4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementS); }
+  inline constexpr bool isVecD2() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementD); }
+  inline constexpr bool isVecB4x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementB4); }
+  inline constexpr bool isVecH2x4() const noexcept { return _signature.subset(kBaseSignatureMask | kSignatureRegElementTypeMask) == (RegTraits<RegType::kARM_VecV>::kSignature | kSignatureElementH2); }
+
+  //! Creates a cloned register with element access.
+  inline Vec at(uint32_t elementIndex) const noexcept {
+    return Vec((signature() & ~kSignatureRegElementIndexMask) | (elementIndex << kSignatureRegElementIndexShift) | kSignatureRegElementFlagMask, id());
+  }
+
+  //! Cast this register to an 8-bit B register (scalar).
+  inline VecB b() const noexcept;
+  //! Cast this register to a 16-bit H register (scalar).
+  inline VecH h() const noexcept;
+  //! Cast this register to a 32-bit S register (scalar).
+  inline VecS s() const noexcept;
+  //! Cast this register to a 64-bit D register (scalar).
+  inline VecD d() const noexcept;
+  //! Cast this register to a 128-bit Q register (scalar).
+  inline VecV q() const noexcept;
+  //! Cast this register to a 128-bit V register.
+  inline VecV v() const noexcept;
+
+  //! Cast this register to a 128-bit V.B[elementIndex] register.
+  inline VecV b(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.H[elementIndex] register.
+  inline VecV h(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.S[elementIndex] register.
+  inline VecV s(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.D[elementIndex] register.
+  inline VecV d(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.H2[elementIndex] register.
+  inline VecV h2(uint32_t elementIndex) const noexcept;
+  //! Cast this register to a 128-bit V.B4[elementIndex] register.
+  inline VecV b4(uint32_t elementIndex) const noexcept;
+
+  //! Cast this register to V.8B.
+  inline VecD b8() const noexcept;
+  //! Cast this register to V.16B.
+  inline VecV b16() const noexcept;
+  //! Cast this register to V.2H.
+  inline VecS h2() const noexcept;
+  //! Cast this register to V.4H.
+  inline VecD h4() const noexcept;
+  //! Cast this register to V.8H.
+  inline VecV h8() const noexcept;
+  //! Cast this register to V.2S.
+  inline VecD s2() const noexcept;
+  //! Cast this register to V.4S.
+  inline VecV s4() const noexcept;
+  //! Cast this register to V.2D.
+  inline VecV d2() const noexcept;
+
+  static inline constexpr OperandSignature _makeElementAccessSignature(uint32_t elementType, uint32_t elementIndex) noexcept {
+    return OperandSignature{
+      uint32_t(RegTraits<RegType::kARM_VecV>::kSignature)      |
+      uint32_t(kSignatureRegElementFlagMask)                   |
+      uint32_t(elementType << kSignatureRegElementTypeShift)   |
+      uint32_t(elementIndex << kSignatureRegElementIndexShift)};
+  }
+};
+
+//! 32-bit GPW (AArch64) and/or GPR (ARM/AArch32) register.
+class GpW : public Gp { ASMJIT_DEFINE_FINAL_REG(GpW, Gp, RegTraits<RegType::kARM_GpW>) };
+//! 64-bit GPX (AArch64) register.
+class GpX : public Gp { ASMJIT_DEFINE_FINAL_REG(GpX, Gp, RegTraits<RegType::kARM_GpX>) };
+
+//! 8-bit view (S) of VFP/SIMD register.
+class VecB : public Vec { ASMJIT_DEFINE_FINAL_REG(VecB, Vec, RegTraits<RegType::kARM_VecB>) };
+//! 16-bit view (S) of VFP/SIMD register.
+class VecH : public Vec { ASMJIT_DEFINE_FINAL_REG(VecH, Vec, RegTraits<RegType::kARM_VecH>) };
+//! 32-bit view (S) of VFP/SIMD register.
+class VecS : public Vec { ASMJIT_DEFINE_FINAL_REG(VecS, Vec, RegTraits<RegType::kARM_VecS>) };
+//! 64-bit view (D) of VFP/SIMD register.
+class VecD : public Vec { ASMJIT_DEFINE_FINAL_REG(VecD, Vec, RegTraits<RegType::kARM_VecD>) };
+//! 128-bit vector register (Q or V).
+class VecV : public Vec { ASMJIT_DEFINE_FINAL_REG(VecV, Vec, RegTraits<RegType::kARM_VecV>) };
+
+inline GpW Gp::w() const noexcept { return GpW(id()); }
+inline GpX Gp::x() const noexcept { return GpX(id()); }
+
+inline VecB Vec::b() const noexcept { return VecB(id()); }
+inline VecH Vec::h() const noexcept { return VecH(id()); }
+inline VecS Vec::s() const noexcept { return VecS(id()); }
+inline VecD Vec::d() const noexcept { return VecD(id()); }
+inline VecV Vec::q() const noexcept { return VecV(id()); }
+inline VecV Vec::v() const noexcept { return VecV(id()); }
+
+inline VecV Vec::b(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeB, elementIndex), id()); }
+inline VecV Vec::h(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeH, elementIndex), id()); }
+inline VecV Vec::s(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeS, elementIndex), id()); }
+inline VecV Vec::d(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeD, elementIndex), id()); }
+inline VecV Vec::h2(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeH2, elementIndex), id()); }
+inline VecV Vec::b4(uint32_t elementIndex) const noexcept { return VecV(_makeElementAccessSignature(kElementTypeB4, elementIndex), id()); }
+
+inline VecD Vec::b8() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementB}, id()); }
+inline VecS Vec::h2() const noexcept { return VecS(OperandSignature{VecS::kSignature | kSignatureElementH}, id()); }
+inline VecD Vec::h4() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementH}, id()); }
+inline VecD Vec::s2() const noexcept { return VecD(OperandSignature{VecD::kSignature | kSignatureElementS}, id()); }
+inline VecV Vec::b16() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementB}, id()); }
+inline VecV Vec::h8() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementH}, id()); }
+inline VecV Vec::s4() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementS}, id()); }
+inline VecV Vec::d2() const noexcept { return VecV(OperandSignature{VecV::kSignature | kSignatureElementD}, id()); }
+
+#ifndef _DOXYGEN
+namespace regs {
+#endif
+
+//! Creates a 32-bit W register operand (ARM/AArch64).
+static inline constexpr GpW w(uint32_t id) noexcept { return GpW(id); }
+//! Creates a 64-bit X register operand (AArch64).
+static inline constexpr GpX x(uint32_t id) noexcept { return GpX(id); }
+//! Creates a 32-bit S register operand (ARM/AArch64).
+static inline constexpr VecS s(uint32_t id) noexcept { return VecS(id); }
+//! Creates a 64-bit D register operand (ARM/AArch64).
+static inline constexpr VecD d(uint32_t id) noexcept { return VecD(id); }
+//! Creates a 1282-bit V register operand (ARM/AArch64).
+static inline constexpr VecV v(uint32_t id) noexcept { return VecV(id); }
+
+#ifndef _DOXYGEN
+} // {regs}
+
+// Make `arm::regs` accessible through `arm` namespace as well.
+using namespace regs;
+#endif
+
+//! Memory operand (ARM).
+class Mem : public BaseMem {
+public:
+  //! \cond INTERNAL
+  //! Additional bits of operand's signature used by `arm::Mem`.
+  enum AdditionalBits : uint32_t {
+    // Index shift value (5 bits).
+    // |........|.....XXX|XX......|........|
+    kSignatureMemShiftValueShift = 14,
+    kSignatureMemShiftValueMask = 0x1Fu << kSignatureMemShiftValueShift,
+
+    // Shift operation type (4 bits).
+    // |........|XXXX....|........|........|
+    kSignatureMemPredicateShift = 20,
+    kSignatureMemPredicateMask = 0x0Fu << kSignatureMemPredicateShift
+  };
+  //! \endcond
+
+  //! Memory offset mode.
+  //!
+  //! Additional constants that can be used with the `predicate`.
+  enum OffsetMode : uint32_t {
+    //! Pre-index "[BASE, #Offset {, <shift>}]!" with write-back.
+    kOffsetPreIndex = 0xE,
+    //! Post-index "[BASE], #Offset {, <shift>}" with write-back.
+    kOffsetPostIndex = 0xF
+  };
+
+  //! \name Construction & Destruction
+  //! \{
+
+  //! Construct a default `Mem` operand, that points to [0].
+  inline constexpr Mem() noexcept
+    : BaseMem() {}
+
+  inline constexpr Mem(const Mem& other) noexcept
+    : BaseMem(other) {}
+
+  inline explicit Mem(Globals::NoInit_) noexcept
+    : BaseMem(Globals::NoInit) {}
+
+  inline constexpr Mem(const Signature& signature, uint32_t baseId, uint32_t indexId, int32_t offset) noexcept
+    : BaseMem(signature, baseId, indexId, offset) {}
+
+  inline constexpr explicit Mem(const Label& base, int32_t off = 0, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(RegType::kLabelTag) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr explicit Mem(const BaseReg& base, int32_t off = 0, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              signature, base.id(), 0, off) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              signature, base.id(), index.id(), 0) {}
+
+  inline constexpr Mem(const BaseReg& base, const BaseReg& index, const Shift& shift, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              Signature::fromMemBaseType(base.type()) |
+              Signature::fromMemIndexType(index.type()) |
+              Signature::fromValue<kSignatureMemPredicateMask>(uint32_t(shift.op())) |
+              Signature::fromValue<kSignatureMemShiftValueMask>(shift.value()) |
+              signature, base.id(), index.id(), 0) {}
+
+  inline constexpr Mem(uint64_t base, Signature signature = Signature{0}) noexcept
+    : BaseMem(Signature::fromOpType(OperandType::kMem) |
+              signature, uint32_t(base >> 32), 0, int32_t(uint32_t(base & 0xFFFFFFFFu))) {}
+
+  //! \}
+
+  //! \name Overloaded Operators
+  //! \{
+
+  inline Mem& operator=(const Mem& other) noexcept = default;
+
+  //! \}
+
+  //! \name Clone
+  //! \{
+
+  //! Clones the memory operand.
+  inline constexpr Mem clone() const noexcept { return Mem(*this); }
+
+  //! Gets new memory operand adjusted by `off`.
+  inline Mem cloneAdjusted(int64_t off) const noexcept {
+    Mem result(*this);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Clones the memory operand and makes it pre-index.
+  inline Mem pre() const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    return result;
+  }
+
+  //! Clones the memory operand, applies a given offset `off` and makes it pre-index.
+  inline Mem pre(int64_t off) const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! Clones the memory operand and makes it post-index.
+  inline Mem post() const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPreIndex);
+    return result;
+  }
+
+  //! Clones the memory operand, applies a given offset `off` and makes it post-index.
+  inline Mem post(int64_t off) const noexcept {
+    Mem result(*this);
+    result.setPredicate(kOffsetPostIndex);
+    result.addOffset(off);
+    return result;
+  }
+
+  //! \}
+
+  //! \name Base & Index
+  //! \{
+
+  //! Converts memory `baseType` and `baseId` to `arm::Reg` instance.
+  //!
+  //! The memory must have a valid base register otherwise the result will be wrong.
+  inline Reg baseReg() const noexcept { return Reg::fromTypeAndId(baseType(), baseId()); }
+
+  //! Converts memory `indexType` and `indexId` to `arm::Reg` instance.
+  //!
+  //! The memory must have a valid index register otherwise the result will be wrong.
+  inline Reg indexReg() const noexcept { return Reg::fromTypeAndId(indexType(), indexId()); }
+
+  using BaseMem::setIndex;
+
+  inline void setIndex(const BaseReg& index, uint32_t shift) noexcept {
+    setIndex(index);
+    setShift(shift);
+  }
+
+  //! \}
+
+  //! \name ARM Specific Features
+  //! \{
+
+  //! Gets whether the memory operand has shift (aka scale) constant.
+  inline constexpr bool hasShift() const noexcept { return _signature.hasField<kSignatureMemShiftValueMask>(); }
+  //! Gets the memory operand's shift (aka scale) constant.
+  inline constexpr uint32_t shift() const noexcept { return _signature.getField<kSignatureMemShiftValueMask>(); }
+  //! Sets the memory operand's shift (aka scale) constant.
+  inline void setShift(uint32_t shift) noexcept { _signature.setField<kSignatureMemShiftValueMask>(shift); }
+  //! Resets the memory operand's shift (aka scale) constant to zero.
+  inline void resetShift() noexcept { _signature.setField<kSignatureMemShiftValueMask>(0); }
+
+  //! Gets memory predicate (shift mode or offset mode), see \ref ShiftOp and \ref OffsetMode.
+  inline constexpr uint32_t predicate() const noexcept { return _signature.getField<kSignatureMemPredicateMask>(); }
+  //! Sets memory predicate to `predicate`, see `Mem::ShiftOp`.
+  inline void setPredicate(uint32_t predicate) noexcept { _signature.setField<kSignatureMemPredicateMask>(predicate); }
+  //! Resets shift mode to LSL (default).
+  inline void resetPredicate() noexcept { _signature.setField<kSignatureMemPredicateMask>(0); }
+
+  inline constexpr bool isFixedOffset() const noexcept { return predicate() < kOffsetPreIndex; }
+  inline constexpr bool isPreOrPost() const noexcept { return predicate() >= kOffsetPreIndex; }
+  inline constexpr bool isPreIndex() const noexcept { return predicate() == kOffsetPreIndex; }
+  inline constexpr bool isPostIndex() const noexcept { return predicate() == kOffsetPostIndex; }
+
+  inline void resetToFixedOffset() noexcept { resetPredicate(); }
+  inline void makePreIndex() noexcept { setPredicate(kOffsetPreIndex); }
+  inline void makePostIndex() noexcept { setPredicate(kOffsetPostIndex); }
+
+  //! \}
+};
+
+//! Creates `[base.reg, offset]` memory operand (offset mode).
+static inline constexpr Mem ptr(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset);
+}
+
+//! Creates `[base.reg, offset]!` memory operand (pre-index mode).
+static inline constexpr Mem ptr_pre(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPreIndex));
+}
+
+//! Creates `[base.reg], offset` memory operand (post-index mode).
+static inline constexpr Mem ptr_post(const Gp& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPostIndex));
+}
+
+//! Creates `[base.reg, index]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, const Gp& index) noexcept {
+  return Mem(base, index);
+}
+
+//! Creates `[base.reg], index` memory operand (post-index mode).
+static inline constexpr Mem ptr_post(const Gp& base, const Gp& index) noexcept {
+  return Mem(base, index, OperandSignature::fromValue<Mem::kSignatureMemPredicateMask>(Mem::kOffsetPostIndex));
+}
+
+//! Creates `[base.reg, index, SHIFT_OP #shift]` memory operand.
+static inline constexpr Mem ptr(const Gp& base, const Gp& index, const Shift& shift) noexcept {
+  return Mem(base, index, shift);
+}
+
+//! Creates `[base + offset]` memory operand.
+static inline constexpr Mem ptr(const Label& base, int32_t offset = 0) noexcept {
+  return Mem(base, offset);
+}
+
+// TODO: [ARM] PC + offset address.
+#if 0
+//! Creates `[PC + offset]` (relative) memory operand.
+static inline constexpr Mem ptr(const PC& pc, int32_t offset = 0) noexcept {
+  return Mem(pc, offset);
+}
+#endif
+
+//! Creates `[base]` absolute memory operand.
+//!
+//! \note The concept of absolute memory operands doesn't exist on ARM, the ISA only provides PC relative addressing.
+//! Absolute memory operands can only be used if it's known that the PC relative offset is encodable and that it
+//! would be within the limits. Absolute address is also often output from disassemblers, so AsmJit support it so it
+//! can assemble it back.
+static inline constexpr Mem ptr(uint64_t base) noexcept { return Mem(base); }
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+//! \cond INTERNAL
+ASMJIT_BEGIN_NAMESPACE
+ASMJIT_DEFINE_TYPE_ID(arm::GpW, TypeId::kInt32);
+ASMJIT_DEFINE_TYPE_ID(arm::GpX, TypeId::kInt64);
+ASMJIT_DEFINE_TYPE_ID(arm::VecS, TypeId::kFloat32x1);
+ASMJIT_DEFINE_TYPE_ID(arm::VecD, TypeId::kFloat64x1);
+ASMJIT_DEFINE_TYPE_ID(arm::VecV, TypeId::kInt32x4);
+ASMJIT_END_NAMESPACE
+//! \endcond
+
+#endif // ASMJIT_ARM_ARMOPERAND_H_INCLUDED
--- a/src/asmjit/x86/x86archtraits_p.h
+++ b/src/asmjit/x86/x86archtraits_p.h
@ -0,0 +1,148 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
+#define ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
+
+#include "../core/archtraits.h"
+#include "../core/misc_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86 architecture traits (internal).
+static const constexpr ArchTraits x86ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  1,
+
+  // Min/Max stack offset
+  0x7FFFFFFFu, 0x7FFFFFFFu,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kRegSwap | InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // Register signatures.
+  #define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8)   ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32)   ? RegType::kX86_Mm    : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64)   ? RegType::kX86_Mm    : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kDB,
+    ArchTypeNameId::kDW,
+    ArchTypeNameId::kDD,
+    ArchTypeNameId::kDQ
+  }
+};
+
+//! X64 architecture traits (internal).
+static const constexpr ArchTraits x64ArchTraits = {
+  // SP/FP/LR/PC.
+  Gp::kIdSp, Gp::kIdBp, 0xFF, 0xFF,
+
+  // Reserved.
+  { 0, 0, 0 },
+
+  // HW stack alignment.
+  1,
+
+  // Min/Max stack offset
+  0x7FFFFFFFu, 0x7FFFFFFFu,
+
+  // ISA features [Gp, Vec, Other0, Other1].
+  {{
+    InstHints::kRegSwap | InstHints::kPushPop,
+    InstHints::kNoHints,
+    InstHints::kNoHints,
+    InstHints::kNoHints
+  }},
+
+  // Register signatures.
+  #define V(index) OperandSignature{x86::RegTraits<RegType(index)>::kSignature}
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // RegTypeToTypeId.
+  #define V(index) TypeId(x86::RegTraits<RegType(index)>::kTypeId)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // TypeIdToRegType.
+  #define V(index) (index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt8)    ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt8)   ? RegType::kX86_GpbLo : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt16)   ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt16)  ? RegType::kX86_Gpw   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt32)   ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt32)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kInt64)   ? RegType::kX86_Gpq   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUInt64)  ? RegType::kX86_Gpq   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kIntPtr)  ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kUIntPtr) ? RegType::kX86_Gpd   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat32) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kFloat64) ? RegType::kX86_Xmm   : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask8)   ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask16)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask32)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMask64)  ? RegType::kX86_KReg  : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx32)   ? RegType::kX86_Mm    : \
+                    index + uint32_t(TypeId::_kBaseStart) == uint32_t(TypeId::kMmx64)   ? RegType::kX86_Mm    : RegType::kNone)
+  {{ ASMJIT_LOOKUP_TABLE_32(V, 0) }},
+  #undef V
+
+  // Word names of 8-bit, 16-bit, 32-bit, and 64-bit quantities.
+  {
+    ArchTypeNameId::kDB,
+    ArchTypeNameId::kDW,
+    ArchTypeNameId::kDD,
+    ArchTypeNameId::kDQ
+  }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86ARCHTRAITS_P_H_INCLUDED
--- a/src/asmjit/x86/x86assembler.cpp
+++ b/src/asmjit/x86/x86assembler.cpp
--- a/src/asmjit/x86/x86assembler.h
+++ b/src/asmjit/x86/x86assembler.h
@ -0,0 +1,685 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
+#define ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
+
+#include "../core/assembler.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 assembler implementation.
+//!
+//! x86::Assembler is a code emitter that emits machine code directly into the \ref CodeBuffer. The assembler is capable
+//! of targeting both 32-bit and 64-bit instruction sets, the instruction set can be configured through \ref CodeHolder.
+//!
+//! ### Basics
+//!
+//! The following example shows a basic use of `x86::Assembler`, how to generate a function that works in both 32-bit
+//! and 64-bit modes, and how to connect \ref JitRuntime, \ref CodeHolder, and `x86::Assembler`.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*SumFunc)(const int* arr, size_t count);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Decide between 32-bit CDECL, WIN64, and SysV64 calling conventions:
+//!   //   32-BIT - passed all arguments by stack.
+//!   //   WIN64  - passes first 4 arguments by RCX, RDX, R8, and R9.
+//!   //   UNIX64 - passes first 6 arguments by RDI, RSI, RCX, RDX, R8, and R9.
+//!   x86::Gp arr, cnt;
+//!   x86::Gp sum = x86::eax;           // Use EAX as 'sum' as it's a return register.
+//!
+//!   if (ASMJIT_ARCH_BITS == 64) {
+//!   #if defined(_WIN32)
+//!     arr = x86::rcx;                 // First argument (array ptr).
+//!     cnt = x86::rdx;                 // Second argument (number of elements)
+//!   #else
+//!     arr = x86::rdi;                 // First argument (array ptr).
+//!     cnt = x86::rsi;                 // Second argument (number of elements)
+//!   #endif
+//!   }
+//!   else {
+//!     arr = x86::edx;                 // Use EDX to hold the array pointer.
+//!     cnt = x86::ecx;                 // Use ECX to hold the counter.
+//!     // Fetch first and second arguments from [ESP + 4] and [ESP + 8].
+//!     a.mov(arr, x86::ptr(x86::esp, 4));
+//!     a.mov(cnt, x86::ptr(x86::esp, 8));
+//!   }
+//!
+//!   Label Loop = a.newLabel();        // To construct the loop, we need some labels.
+//!   Label Exit = a.newLabel();
+//!
+//!   a.xor_(sum, sum);                 // Clear 'sum' register (shorter than 'mov').
+//!   a.test(cnt, cnt);                 // Border case:
+//!   a.jz(Exit);                       //   If 'cnt' is zero jump to 'Exit' now.
+//!
+//!   a.bind(Loop);                     // Start of a loop iteration.
+//!   a.add(sum, x86::dword_ptr(arr));  // Add int at [arr] to 'sum'.
+//!   a.add(arr, 4);                    // Increment 'arr' pointer.
+//!   a.dec(cnt);                       // Decrease 'cnt'.
+//!   a.jnz(Loop);                      // If not zero jump to 'Loop'.
+//!
+//!   a.bind(Exit);                     // Exit to handle the border case.
+//!   a.ret();                          // Return from function ('sum' == 'eax').
+//!   // ----> x86::Assembler is no longer needed from here and can be destroyed <----
+//!
+//!   SumFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   static const int array[6] = { 4, 8, 15, 16, 23, 42 };
+//!
+//!   int result = fn(array, 6);        // Execute the generated code.
+//!   printf("%d\n", result);           // Print sum of array (108).
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime
+//!   return 0;                         // Everything successful...
+//! }
+//! ```
+//!
+//! The example should be self-explanatory. It shows how to work with labels, how to use operands, and how to emit
+//! instructions that can use different registers based on runtime selection. It implements 32-bit CDECL, WIN64,
+//! and SysV64 caling conventions and will work on most X86/X64 environments.
+//!
+//! Although functions prologs / epilogs can be implemented manually, AsmJit provides utilities that can be used
+//! to create function prologs and epilogs automatically, see \ref asmjit_function for more details.
+//!
+//! ### Instruction Validation
+//!
+//! Assembler prefers speed over strictness by default. The implementation checks the type of operands and fails
+//! if the signature of types is invalid, however, it does only basic checks regarding registers and their groups
+//! used in instructions. It's possible to pass operands that don't form any valid signature to the implementation
+//! and succeed. This is usually not a problem as Assembler provides typed API so operand types are normally checked
+//! by C++ compiler at compile time, however, Assembler is fully dynamic and its \ref emit() function can be called
+//! with any instruction id, options, and operands. Moreover, it's also possible to form instructions that will be
+//! accepted by the typed API, for example by calling `mov(x86::eax, x86::al)` - the C++ compiler won't see a problem
+//! as both EAX and AL are \ref Gp registers.
+//!
+//! To help with common mistakes AsmJit allows to activate instruction validation. This feature instruments
+//! the Assembler to call \ref InstAPI::validate() before it attempts to encode any instruction.
+//!
+//! The example below illustrates how validation can be turned on:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Enable strict validation.
+//!   a.addDiagnosticOptions(DiagnosticOptions::kValidateAssembler);
+//!
+//!   // Try to encode invalid or ill-formed instructions.
+//!   Error err;
+//!
+//!   // Invalid instruction.
+//!   err = a.mov(x86::eax, x86::al);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   // Invalid instruction.
+//!   err = a.emit(x86::Inst::kIdMovss, x86::eax, x86::xmm0);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   // Ambiguous operand size - the pointer requires size.
+//!   err = a.inc(x86::ptr(x86::rax), 1);
+//!   printf("Status: %s\n", DebugUtils::errorAsString(err));
+//!
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Native Registers
+//!
+//! All emitters provide functions to construct machine-size registers depending on the target. This feature is
+//! for users that want to write code targeting both 32-bit and 64-bit architectures at the same time. In AsmJit
+//! terminology such registers have prefix `z`, so for example on X86 architecture the following native registers
+//! are provided:
+//!
+//!   - `zax` - mapped to either `eax` or `rax`
+//!   - `zbx` - mapped to either `ebx` or `rbx`
+//!   - `zcx` - mapped to either `ecx` or `rcx`
+//!   - `zdx` - mapped to either `edx` or `rdx`
+//!   - `zsp` - mapped to either `esp` or `rsp`
+//!   - `zbp` - mapped to either `ebp` or `rbp`
+//!   - `zsi` - mapped to either `esi` or `rsi`
+//!   - `zdi` - mapped to either `edi` or `rdi`
+//!
+//! They are accessible through \ref x86::Assembler, \ref x86::Builder, and \ref x86::Compiler. The example below
+//! illustrates how to use this feature:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef int (*Func)(void);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Let's get these registers from x86::Assembler.
+//!   x86::Gp zbp = a.zbp();
+//!   x86::Gp zsp = a.zsp();
+//!
+//!   int stackSize = 32;
+//!
+//!   // Function prolog.
+//!   a.push(zbp);
+//!   a.mov(zbp, zsp);
+//!   a.sub(zsp, stackSize);
+//!
+//!   // ... emit some code (this just sets return value to zero) ...
+//!   a.xor_(x86::eax, x86::eax);
+//!
+//!   // Function epilog and return.
+//!   a.mov(zsp, zbp);
+//!   a.pop(zbp);
+//!   a.ret();
+//!
+//!   // To make the example complete let's call it.
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "0".
+//!
+//!   rt.release(fn);                   // Remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! The example just returns `0`, but the function generated contains a standard prolog and epilog sequence and the
+//! function itself reserves 32 bytes of local stack. The advantage is clear - a single code-base can handle multiple
+//! targets easily. If you want to create a register of native size dynamically by specifying its id it's also possible:
+//!
+//! ```
+//! void example(x86::Assembler& a) {
+//!   x86::Gp zax = a.gpz(x86::Gp::kIdAx);
+//!   x86::Gp zbx = a.gpz(x86::Gp::kIdBx);
+//!   x86::Gp zcx = a.gpz(x86::Gp::kIdCx);
+//!   x86::Gp zdx = a.gpz(x86::Gp::kIdDx);
+//!
+//!   // You can also change register's id easily.
+//!   x86::Gp zsp = zax;
+//!   zsp.setId(4); // or x86::Gp::kIdSp.
+//! }
+//! ```
+//!
+//! ### Data Embedding
+//!
+//! x86::Assembler extends the standard \ref BaseAssembler with X86/X64 specific conventions that are often used by
+//! assemblers to embed data next to the code. The following functions can be used to embed data:
+//!
+//!   - \ref BaseAssembler::embedInt8() - embeds int8_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt8() - embeds uint8_t (portable naming).
+//!   - \ref BaseAssembler::embedInt16() - embeds int16_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt16() - embeds uint16_t (portable naming).
+//!   - \ref BaseAssembler::embedInt32() - embeds int32_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt32() - embeds uint32_t (portable naming).
+//!   - \ref BaseAssembler::embedInt64() - embeds int64_t (portable naming).
+//!   - \ref BaseAssembler::embedUInt64() - embeds uint64_t (portable naming).
+//!   - \ref BaseAssembler::embedFloat() - embeds float (portable naming).
+//!   - \ref BaseAssembler::embedDouble() - embeds double (portable naming).
+//!
+//!   - \ref x86::Assembler::db() - embeds byte (8 bits) (x86 naming).
+//!   - \ref x86::Assembler::dw() - embeds word (16 bits) (x86 naming).
+//!   - \ref x86::Assembler::dd() - embeds dword (32 bits) (x86 naming).
+//!   - \ref x86::Assembler::dq() - embeds qword (64 bits) (x86 naming).
+//!
+//! The following example illustrates how embed works:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedData(x86::Assembler& a) {
+//!   a.db(0xFF);         // Embeds 0xFF byte.
+//!   a.dw(0xFF00);       // Embeds 0xFF00 word (little-endian).
+//!   a.dd(0xFF000000);   // Embeds 0xFF000000 dword (little-endian).
+//!   a.embedFloat(0.4f); // Embeds 0.4f (32-bit float, little-endian).
+//! }
+//! ```
+//!
+//! Sometimes it's required to read the data that is embedded after code, for example. This can be done through
+//! \ref Label as shown below:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedData(x86::Assembler& a, const Label& L_Data) {
+//!   x86::Gp addr = a.zax();  // EAX or RAX.
+//!   x86::Gp val = x86::edi;  // Where to store some value...
+//!
+//!   // Approach 1 - Load the address to register through LEA. This approach
+//!   //              is flexible as the address can be then manipulated, for
+//!   //              example if you have a data array, which would need index.
+//!   a.lea(addr, L_Data);     // Loads the address of the label to EAX or RAX.
+//!   a.mov(val, dword_ptr(addr));
+//!
+//!   // Approach 2 - Load the data directly by using L_Data in address. It's
+//!   //              worth noting that this doesn't work with indexes in X64
+//!   //              mode. It will use absolute address in 32-bit mode and
+//!   //              relative address (RIP) in 64-bit mode.
+//!   a.mov(val, dword_ptr(L_Data));
+//! }
+//! ```
+//!
+//! ### Label Embedding
+//!
+//! It's also possible to embed labels. In general AsmJit provides the following options:
+//!
+//!   - \ref BaseEmitter::embedLabel() - Embeds absolute address of a label. This is target dependent and would
+//!     embed either 32-bit or 64-bit data that embeds absolute label address. This kind of embedding cannot be
+//!     used in a position independent code.
+//!
+//!   - \ref BaseEmitter::embedLabelDelta() - Embeds a difference between two labels. The size of the difference
+//!     can be specified so it's possible to embed 8-bit, 16-bit, 32-bit, and 64-bit difference, which is sufficient
+//!     for most purposes.
+//!
+//! The following example demonstrates how to embed labels and their differences:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! using namespace asmjit;
+//!
+//! void embedLabel(x86::Assembler& a, const Label& L_Data) {
+//!   // [1] Embed L_Data - the size of the data will be dependent on the target.
+//!   a.embedLabel(L_Data);
+//!
+//!   // [2] Embed a 32-bit difference of two labels.
+//!   Label L_Here = a.newLabel();
+//!   a.bind(L_Here);
+//!   // Embeds int32_t(L_Data - L_Here).
+//!   a.embedLabelDelta(L_Data, L_Here, 4);
+//! }
+//! ```
+//!
+//! ### Using FuncFrame and FuncDetail with x86::Assembler
+//!
+//! The example below demonstrates how \ref FuncFrame and \ref FuncDetail can be used together with \ref x86::Assembler
+//! to generate a function that will use platform dependent calling conventions automatically depending on the target:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create JIT Runtime.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Decide which registers will be mapped to function arguments. Try changing
+//!   // registers of dst, src_a, and src_b and see what happens in function's
+//!   // prolog and epilog.
+//!   x86::Gp dst   = a.zax();
+//!   x86::Gp src_a = a.zcx();
+//!   x86::Gp src_b = a.zdx();
+//!
+//!   X86::Xmm vec0 = x86::xmm0;
+//!   X86::Xmm vec1 = x86::xmm1;
+//!
+//!   // Create/initialize FuncDetail and FuncFrame.
+//!   FuncDetail func;
+//!   func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
+//!
+//!   FuncFrame frame;
+//!   frame.init(func);
+//!
+//!   // Make XMM0 and XMM1 dirty - RegGroup::kVec describes XMM|YMM|ZMM registers.
+//!   frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
+//!
+//!   // Alternatively, if you don't want to use register masks you can pass BaseReg
+//!   // to addDirtyRegs(). The following code would add both xmm0 and xmm1.
+//!   frame.addDirtyRegs(x86::xmm0, x86::xmm1);
+//!
+//!   FuncArgsAssignment args(&func);   // Create arguments assignment context.
+//!   args.assignAll(dst, src_a, src_b);// Assign our registers to arguments.
+//!   args.updateFrameInfo(frame);      // Reflect our args in FuncFrame.
+//!   frame.finalize();                 // Finalize the FuncFrame (updates it).
+//!
+//!   a.emitProlog(frame);              // Emit function prolog.
+//!   a.emitArgsAssignment(frame, args);// Assign arguments to registers.
+//!   a.movdqu(vec0, x86::ptr(src_a));  // Load 4 ints from [src_a] to XMM0.
+//!   a.movdqu(vec1, x86::ptr(src_b));  // Load 4 ints from [src_b] to XMM1.
+//!   a.paddd(vec0, vec1);              // Add 4 ints in XMM1 to XMM0.
+//!   a.movdqu(x86::ptr(dst), vec0);    // Store the result to [dst].
+//!   a.emitEpilog(frame);              // Emit function epilog and return.
+//!
+//!   SumIntsFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error case.
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!   fn(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   rt.release(fn);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Using x86::Assembler as Code-Patcher
+//!
+//! This is an advanced topic that is sometimes unavoidable. AsmJit by default appends machine code it generates
+//! into a \ref CodeBuffer, however, it also allows to set the offset in \ref CodeBuffer explicitly and to overwrite
+//! its content. This technique is extremely dangerous as X86 instructions have variable length (see below), so you
+//! should in general only patch code to change instruction's immediate values or some other details not known the
+//! at a time the instruction was emitted. A typical scenario that requires code-patching is when you start emitting
+//! function and you don't know how much stack you want to reserve for it.
+//!
+//! Before we go further it's important to introduce instruction options, because they can help with code-patching
+//! (and not only patching, but that will be explained in AVX-512 section):
+//!
+//!   - Many general-purpose instructions (especially arithmetic ones) on X86 have multiple encodings - in AsmJit
+//!     this is usually called 'short form' and 'long form'.
+//!
+//!   - AsmJit always tries to use 'short form' as it makes the resulting machine-code smaller, which is always
+//!     good - this decision is used by majority of assemblers out there.
+//!
+//!   - AsmJit allows to override the default decision by using `short_()` and `long_()` instruction options to force
+//!     short or long form, respectively. The most useful is `long_()` as it basically forces AsmJit to always emit
+//!     the longest form. The `short_()` is not that useful as it's automatic (except jumps to non-bound labels). Note
+//!     that the underscore after each function name avoids collision with built-in C++ types.
+//!
+//! To illustrate what short form and long form means in binary let's assume we want to emit "add esp, 16" instruction,
+//! which has two possible binary encodings:
+//!
+//!   - `83C410` - This is a short form aka `short add esp, 16` - You can see opcode byte (0x8C), MOD/RM byte (0xC4)
+//!     and an 8-bit immediate value representing `16`.
+//!
+//!   - `81C410000000` - This is a long form aka `long add esp, 16` - You can see a different opcode byte (0x81), the
+//!     same Mod/RM byte (0xC4) and a 32-bit immediate in little-endian representing `16`.
+//!
+//! It should be obvious that patching an existing instruction into an instruction having a different size may create
+//! various problems. So it's recommended to be careful and to only patch instructions into instructions having the
+//! same size. The example below demonstrates how instruction options can be used to guarantee the size of an
+//! instruction by forcing the assembler to use long-form encoding:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef int (*Func)(void);
+//!
+//! int main(int argc, char* argv[]) {
+//!   JitRuntime rt;                    // Create a runtime specialized for JIT.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Assembler a(&code);          // Create and attach x86::Assembler to code.
+//!
+//!   // Let's get these registers from x86::Assembler.
+//!   x86::Gp zbp = a.zbp();
+//!   x86::Gp zsp = a.zsp();
+//!
+//!   // Function prolog.
+//!   a.push(zbp);
+//!   a.mov(zbp, zsp);
+//!
+//!   // This is where we are gonna patch the code later, so let's get the offset
+//!   // (the current location) from the beginning of the code-buffer.
+//!   size_t patchOffset = a.offset();
+//!   // Let's just emit 'sub zsp, 0' for now, but don't forget to use LONG form.
+//!   a.long_().sub(zsp, 0);
+//!
+//!   // ... emit some code (this just sets return value to zero) ...
+//!   a.xor_(x86::eax, x86::eax);
+//!
+//!   // Function epilog and return.
+//!   a.mov(zsp, zbp);
+//!   a.pop(zbp);
+//!   a.ret();
+//!
+//!   // Now we know how much stack size we want to reserve. I have chosen 128
+//!   // bytes on purpose as it's encodable only in long form that we have used.
+//!
+//!   int stackSize = 128;              // Number of bytes to reserve on the stack.
+//!   a.setOffset(patchOffset);         // Move the current cursor to `patchOffset`.
+//!   a.long_().sub(zsp, stackSize);    // Patch the code; don't forget to use LONG form.
+//!
+//!   // Now the code is ready to be called
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "0".
+//!
+//!   rt.release(fn);                   // Remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! If you run the example it will just work, because both instructions have the same size. As an experiment you can
+//! try removing `long_()` form to see what happens when wrong code is generated.
+//!
+//! ### Code Patching and REX Prefix
+//!
+//! In 64-bit mode there is one more thing to worry about when patching code: REX prefix. It's a single byte prefix
+//! designed to address registers with ids from 9 to 15 and to override the default width of operation from 32 to 64
+//! bits. AsmJit, like other assemblers, only emits REX prefix when it's necessary. If the patched code only changes
+//! the immediate value as shown in the previous example then there is nothing to worry about as it doesn't change
+//! the logic behind emitting REX prefix, however, if the patched code changes register id or overrides the operation
+//! width then it's important to take care of REX prefix as well.
+//!
+//! AsmJit contains another instruction option that controls (forces) REX prefix - `rex()`. If you use it the
+//! instruction emitted will always use REX prefix even when it's encodable without it. The following list contains
+//! some instructions and their binary representations to illustrate when it's emitted:
+//!
+//!   - `__83C410` - `add esp, 16`     - 32-bit operation in 64-bit mode doesn't require REX prefix.
+//!   - `4083C410` - `rex add esp, 16` - 32-bit operation in 64-bit mode with forced REX prefix (0x40).
+//!   - `4883C410` - `add rsp, 16`     - 64-bit operation in 64-bit mode requires REX prefix (0x48).
+//!   - `4183C410` - `add r12d, 16`    - 32-bit operation in 64-bit mode using R12D requires REX prefix (0x41).
+//!   - `4983C410` - `add r12, 16`     - 64-bit operation in 64-bit mode using R12 requires REX prefix (0x49).
+//!
+//! ### More Prefixes
+//!
+//! X86 architecture is known for its prefixes. AsmJit supports all prefixes
+//! that can affect how the instruction is encoded:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void prefixesExample(x86::Assembler& a) {
+//!   // Lock prefix for implementing atomics:
+//!   //   lock add dword ptr [dst], 1
+//!   a.lock().add(x86::dword_ptr(dst), 1);
+//!
+//!   // Similarly, XAcquire/XRelease prefixes are also available:
+//!   //   xacquire add dword ptr [dst], 1
+//!   a.xacquire().add(x86::dword_ptr(dst), 1);
+//!
+//!   // Rep prefix (see also repe/repz and repne/repnz):
+//!   //   rep movs byte ptr [dst], byte ptr [src]
+//!   a.rep().movs(x86::byte_ptr(dst), x86::byte_ptr(src));
+//!
+//!   // Forcing REX prefix in 64-bit mode.
+//!   //   rex mov eax, 1
+//!   a.rex().mov(x86::eax, 1);
+//!
+//!   // AVX instruction without forced prefix uses the shortest encoding:
+//!   //   vaddpd xmm0, xmm1, xmm2 -> [C5|F1|58|C2]
+//!   a.vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Forcing VEX3 prefix (AVX):
+//!   //   vex3 vaddpd xmm0, xmm1, xmm2 -> [C4|E1|71|58|C2]
+//!   a.vex3().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Forcing EVEX prefix (AVX512):
+//!   //   evex vaddpd xmm0, xmm1, xmm2 -> [62|F1|F5|08|58|C2]
+//!   a.evex().vaddpd(x86::xmm0, x86::xmm1, x86::xmm2);
+//!
+//!   // Some instructions accept prefixes not originally intended to:
+//!   //   rep ret
+//!   a.rep().ret();
+//! }
+//! ```
+//!
+//! It's important to understand that prefixes are part of instruction options. When a member function that involves
+//! adding a prefix is called the prefix is combined with existing instruction options, which will affect the next
+//! instruction generated.
+//!
+//! ### Generating AVX512 code.
+//!
+//! x86::Assembler can generate AVX512+ code including the use of opmask registers. Opmask can be specified through
+//! \ref x86::Assembler::k() function, which stores it as an extra register, which will be used by the next
+//! instruction. AsmJit uses such concept for manipulating instruction options as well.
+//!
+//! The following AVX512 features are supported:
+//!
+//!   - Opmask selector {k} and zeroing {z}.
+//!   - Rounding modes {rn|rd|ru|rz} and suppress-all-exceptions {sae} option.
+//!   - AVX512 broadcasts {1toN}.
+//!
+//! The following example demonstrates how AVX512 features can be used:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! void generateAVX512Code(x86::Assembler& a) {
+//!   using namespace x86;
+//!
+//!   // Opmask Selectors
+//!   // ----------------
+//!   //
+//!   //   - Opmask / zeroing is part of the instruction options / extraReg.
+//!   //   - k(reg) is like {kreg} in Intel syntax.
+//!   //   - z() is like {z} in Intel syntax.
+//!
+//!   // vaddpd zmm {k1} {z}, zmm1, zmm2
+//!   a.k(k1).z().vaddpd(zmm0, zmm1, zmm2);
+//!
+//!   // Memory Broadcasts
+//!   // -----------------
+//!   //
+//!   //   - Broadcast data is part of memory operand.
+//!   //   - Use x86::Mem::_1toN(), which returns a new x86::Mem operand.
+//!
+//!   // vaddpd zmm0 {k1} {z}, zmm1, [rcx] {1to8}
+//!   a.k(k1).z().vaddpd(zmm0, zmm1, x86::mem(rcx)._1to8());
+//!
+//!   // Embedded Rounding & Suppress-All-Exceptoins
+//!   // -------------------------------------------
+//!   //
+//!   //   - Rounding mode and {sae} are part of instruction options.
+//!   //   - Use sae() to enable exception suppression.
+//!   //   - Use rn_sae(), rd_sae(), ru_sae(), and rz_sae() - to enable rounding.
+//!   //   - Embedded rounding implicitly sets {sae} as well, that's why the API
+//!   //     also has sae() suffix, to make it clear.
+//!
+//!   // vcmppd k1, zmm1, zmm2, 0x00 {sae}
+//!   a.sae().vcmppd(k1, zmm1, zmm2, 0);
+//!
+//!   // vaddpd zmm0, zmm1, zmm2 {rz}
+//!   a.rz_sae().vaddpd(zmm0, zmm1, zmm2);
+//! }
+//! ```
+class ASMJIT_VIRTAPI Assembler
+  : public BaseAssembler,
+    public EmitterImplicitT<Assembler> {
+public:
+  ASMJIT_NONCOPYABLE(Assembler)
+  typedef BaseAssembler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Assembler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Assembler() noexcept;
+
+  //! \}
+
+  //! \cond INTERNAL
+  //! \name Internal
+  //! \{
+
+  // NOTE: x86::Assembler uses _privateData to store 'address-override' bit that is used to decide whether to emit
+  // address-override (67H) prefix based on the memory BASE+INDEX registers. It's either `kX86MemInfo_67H_X86` or
+  // `kX86MemInfo_67H_X64`.
+  inline uint32_t _addressOverrideMask() const noexcept { return _privateData; }
+  inline void _setAddressOverrideMask(uint32_t m) noexcept { _privateData = m; }
+
+  //! \}
+  //! \endcond
+
+  //! \name Emit
+  //! \{
+
+  ASMJIT_API Error _emit(InstId instId, const Operand_& o0, const Operand_& o1, const Operand_& o2, const Operand_* opExt) override;
+
+  //! \}
+  //! \endcond
+
+  //! \name Align
+  //! \{
+
+  ASMJIT_API Error align(AlignMode alignMode, uint32_t alignment) override;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86ASSEMBLER_H_INCLUDED
--- a/src/asmjit/x86/x86builder.cpp
+++ b/src/asmjit/x86/x86builder.cpp
@ -0,0 +1,52 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_BUILDER)
+
+#include "../x86/x86assembler.h"
+#include "../x86/x86builder.h"
+#include "../x86/x86emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Builder - Construction & Destruction
+// =========================================
+
+Builder::Builder(CodeHolder* code) noexcept : BaseBuilder() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Builder::~Builder() noexcept {}
+
+// x86::Builder - Events
+// =====================
+
+Error Builder::onAttach(CodeHolder* code) noexcept {
+  return Base::onAttach(code);
+}
+
+Error Builder::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// x86::Builder - Finalize
+// =======================
+
+Error Builder::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_BUILDER
--- a/src/asmjit/x86/x86builder.h
+++ b/src/asmjit/x86/x86builder.h
@ -0,0 +1,353 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86BUILDER_H_INCLUDED
+#define ASMJIT_X86_X86BUILDER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_BUILDER
+
+#include "../core/builder.h"
+#include "../x86/x86emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 builder implementation.
+//!
+//! The code representation used by \ref BaseBuilder is compatible with everything AsmJit provides. Each instruction
+//! is stored as \ref InstNode, which contains instruction id, options, and operands. Each instruction emitted will
+//! create a new \ref InstNode instance and add it to the current cursor in the double-linked list of nodes. Since
+//! the instruction stream used by \ref BaseBuilder can be manipulated, we can rewrite the SumInts example from
+//! \ref asmjit_assembler into the following:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! typedef void (*SumIntsFunc)(int* dst, const int* a, const int* b);
+//!
+//! // Small helper function to print the current content of `cb`.
+//! static void dumpCode(BaseBuilder& builder, const char* phase) {
+//!   String sb;
+//!   formatOptions formatOptions {};
+//!
+//!   Formatter::formatNodeList(sb, formatOptions, &builder);
+//!   printf("%s:\n%s\n", phase, sb.data());
+//! }
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Create JIT Runtime.
+//!   CodeHolder code;                  // Create a CodeHolder.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Builder cb(&code);           // Create and attach x86::Builder to `code`.
+//!
+//!   // Decide which registers will be mapped to function arguments. Try changing registers
+//!   // of `dst`, `srcA`, and `srcB` and see what happens in function's prolog and epilog.
+//!   x86::Gp dst = cb.zax();
+//!   x86::Gp srcA = cb.zcx();
+//!   x86::Gp srcB = cb.zdx();
+//!
+//!   X86::Xmm vec0 = x86::xmm0;
+//!   X86::Xmm vec1 = x86::xmm1;
+//!
+//!   // Create and initialize `FuncDetail`.
+//!   FuncDetail func;
+//!   func.init(FuncSignatureT<void, int*, const int*, const int*>(CallConvId::kHost));
+//!
+//!   // Remember prolog insertion point.
+//!   BaseNode* prologInsertionPoint = cb.cursor();
+//!
+//!   // Emit function body:
+//!   cb.movdqu(vec0, x86::ptr(srcA));  // Load 4 ints from [srcA] to XMM0.
+//!   cb.movdqu(vec1, x86::ptr(srcB));  // Load 4 ints from [srcB] to XMM1.
+//!   cb.paddd(vec0, vec1);             // Add 4 ints in XMM1 to XMM0.
+//!   cb.movdqu(x86::ptr(dst), vec0);   // Store the result to [dst].
+//!
+//!   // Remember epilog insertion point.
+//!   BaseNode* epilogInsertionPoint = cb.cursor();
+//!
+//!   // Let's see what we have now.
+//!   dumpCode(cb, "Raw Function");
+//!
+//!   // Now, after we emitted the function body, we can insert the prolog, arguments
+//!   // allocation, and epilog. This is not possible with using pure x86::Assembler.
+//!   FuncFrame frame;
+//!   frame.init(func);
+//!
+//!   // Make XMM0 and XMM1 dirty; RegGroup::kVec describes XMM|YMM|ZMM registers.
+//!   frame.setDirtyRegs(RegGroup::kVec, IntUtils::mask(0, 1));
+//!
+//!   FuncArgsAssignment args(&func);   // Create arguments assignment context.
+//!   args.assignAll(dst, srcA, srcB);  // Assign our registers to arguments.
+//!   args.updateFrame(frame);          // Reflect our args in FuncFrame.
+//!   frame.finalize();                 // Finalize the FuncFrame (updates it).
+//!
+//!   // Insert function prolog and allocate arguments to registers.
+//!   cb.setCursor(prologInsertionPoint);
+//!   cb.emitProlog(frame);
+//!   cb.emitArgsAssignment(frame, args);
+//!
+//!   // Insert function epilog.
+//!   cb.setCursor(epilogInsertionPoint);
+//!   cb.emitEpilog(frame);
+//!
+//!   // Let's see how the function's prolog and epilog looks.
+//!   dumpCode(cb, "Prolog & Epilog");
+//!
+//!   // IMPORTANT: Builder requires finalize() to be called to serialize its
+//!   // code to the Assembler (it automatically creates one if not attached).
+//!   cb.finalize();
+//!
+//!   SumIntsFunc fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error case.
+//!
+//!   // Execute the generated function.
+//!   int inA[4] = { 4, 3, 2, 1 };
+//!   int inB[4] = { 1, 5, 2, 8 };
+//!   int out[4];
+//!   fn(out, inA, inB);
+//!
+//!   // Prints {5 8 4 9}
+//!   printf("{%d %d %d %d}\n", out[0], out[1], out[2], out[3]);
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! When the example is executed it should output the following (this one using AMD64-SystemV ABI):
+//!
+//! ```
+//! Raw Function:
+//! movdqu xmm0, [rcx]
+//! movdqu xmm1, [rdx]
+//! paddd xmm0, xmm1
+//! movdqu [rax], xmm0
+//!
+//! Prolog & Epilog:
+//! mov rax, rdi
+//! mov rcx, rsi
+//! movdqu xmm0, [rcx]
+//! movdqu xmm1, [rdx]
+//! paddd xmm0, xmm1
+//! movdqu [rax], xmm0
+//! ret
+//!
+//! {5 8 4 9}
+//! ```
+//!
+//! The number of use-cases of \ref BaseBuilder is not limited and highly depends on your creativity and experience.
+//! The previous example can be easily improved to collect all dirty registers inside the function programmatically
+//! and to pass them to \ref FuncFrame::setDirtyRegs().
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! // NOTE: This function doesn't cover all possible constructs. It ignores instructions that write
+//! // to implicit registers that are not part of the operand list. It also counts read-only registers.
+//! // Real implementation would be a bit more complicated, but still relatively easy to implement.
+//! static void collectDirtyRegs(const BaseNode* first,
+//!                              const BaseNode* last,
+//!                              Support::Array<RegMask, Globals::kNumVirtGroups>& regMask) {
+//!   const BaseNode* node = first;
+//!   while (node) {
+//!     if (node->actsAsInst()) {
+//!       const InstNode* inst = node->as<InstNode>();
+//!       const Operand* opArray = inst->operands();
+//!
+//!       for (uint32_t i = 0, opCount = inst->opCount(); i < opCount; i++) {
+//!         const Operand& op = opArray[i];
+//!         if (op.isReg()) {
+//!           const x86::Reg& reg = op.as<x86::Reg>();
+//!           if (reg.group() <= RegGroup::kMaxVirt) {
+//!             regMask[reg.group()] |= 1u << reg.id();
+//!           }
+//!         }
+//!       }
+//!     }
+//!
+//!     if (node == last)
+//!       break;
+//!     node = node->next();
+//!   }
+//! }
+//!
+//! static void setDirtyRegsOfFuncFrame(const x86::Builder& builder, FuncFrame& frame) {
+//!   Support::Array<RegMask, Globals::kNumVirtGroups> regMask {};
+//!   collectDirtyRegs(builder.firstNode(), builder.lastNode(), regMask);
+//!
+//!   // X86/X64 ABIs only require to save GP/XMM registers:
+//!   frame.setDirtyRegs(RegGroup::kGp, regMask[RegGroup::kGp]);
+//!   frame.setDirtyRegs(RegGroup::kVec, regMask[RegGroup::kVec]);
+//! }
+//! ```
+//!
+//! ### Casting Between Various Emitters
+//!
+//! Even when \ref BaseAssembler and \ref BaseBuilder provide the same interface as defined by \ref BaseEmitter their
+//! platform dependent variants like \ref x86::Assembler and \ref x86::Builder cannot be interchanged or casted to each
+//! other by using a C++ `static_cast<>`. The main reason is the inheritance graph of these classes is different and
+//! cast-incompatible, as illustrated below:
+//!
+//! ```
+//!                                             +--------------+      +=========================+
+//!                    +----------------------->| x86::Emitter |<--+--# x86::EmitterImplicitT<> #<--+
+//!                    |                        +--------------+   |  +=========================+   |
+//!                    |                           (abstract)      |           (mixin)              |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//!                    +-->| BaseAssembler|---->|x86::Assembler|<--+                                |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//!                    |      (abstract)            (final)        |                                |
+//! +===============+  |   +--------------+     +~~~~~~~~~~~~~~+   |                                |
+//! #  BaseEmitter  #--+-->|  BaseBuilder |--+->| x86::Builder |<--+                                |
+//! +===============+      +--------------+  |  +~~~~~~~~~~~~~~+                                    |
+//!    (abstract)             (abstract)     |      (final)                                         |
+//!                    +---------------------+                                                      |
+//!                    |                                                                            |
+//!                    |   +--------------+     +~~~~~~~~~~~~~~+      +=========================+   |
+//!                    +-->| BaseCompiler |---->| x86::Compiler|<-----# x86::EmitterExplicitT<> #---+
+//!                        +--------------+     +~~~~~~~~~~~~~~+      +=========================+
+//!                           (abstract)            (final)                   (mixin)
+//! ```
+//!
+//! The graph basically shows that it's not possible to cast between \ref x86::Assembler and \ref x86::Builder.
+//! However, since both share the base interface (\ref BaseEmitter) it's possible to cast them to a class that
+//! cannot be instantiated, but defines the same interface - the class is called \ref x86::Emitter and was
+//! introduced to make it possible to write a function that can emit to both \ref x86::Assembler and \ref
+//! x86::Builder. Note that \ref x86::Emitter cannot be created, it's abstract and has private constructors and
+//! destructors; it was only designed to be casted to and used as an interface.
+//!
+//! Each architecture-specific emitter implements a member function called
+//! `as<arch::Emitter>()`, which casts the instance to the architecture
+//! specific emitter as illustrated below:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void emitSomething(x86::Emitter* e) {
+//!   e->mov(x86::eax, x86::ebx);
+//! }
+//!
+//! static void assemble(CodeHolder& code, bool useAsm) {
+//!   if (useAsm) {
+//!     x86::Assembler assembler(&code);
+//!     emitSomething(assembler.as<x86::Emitter>());
+//!   }
+//!   else {
+//!     x86::Builder builder(&code);
+//!     emitSomething(builder.as<x86::Emitter>());
+//!
+//!     // NOTE: Builder requires `finalize()` to be called to serialize its
+//!     // content to Assembler (it automatically creates one if not attached).
+//!     builder.finalize();
+//!   }
+//! }
+//! ```
+//!
+//! The example above shows how to create a function that can emit code to either \ref x86::Assembler or \ref
+//! x86::Builder through \ref x86::Emitter, which provides emitter-neutral functionality. \ref x86::Emitter,
+//! however, doesn't provide any emitter-specific functionality like `setCursor()`.
+//!
+//! ### Code Injection and Manipulation
+//!
+//! \ref BaseBuilder emitter stores its nodes in a double-linked list, which makes it easy to manipulate that
+//! list during the code generation or afterwards. Each node is always emitted next to the current cursor and
+//! the cursor is advanced to that newly emitted node. The cursor can be retrieved and changed by \ref
+//! BaseBuilder::cursor() and \ref BaseBuilder::setCursor(), respectively.
+//!
+//! The example below demonstrates how to remember a node and inject something
+//! next to it.
+//!
+//! ```
+//! static void example(x86::Builder& builder) {
+//!   // Emit something, after it returns the cursor would point at the last
+//!   // emitted node.
+//!   builder.mov(x86::rax, x86::rdx); // [1]
+//!
+//!   // We can retrieve the node.
+//!   BaseNode* node = builder.cursor();
+//!
+//!   // Change the instruction we just emitted, just for fun...
+//!   if (node->isInst()) {
+//!     InstNode* inst = node->as<InstNode>();
+//!     // Changes the operands at index [1] to RCX.
+//!     inst->setOp(1, x86::rcx);
+//!   }
+//!
+//!   // ------------------------- Generate Some Code -------------------------
+//!   builder.add(x86::rax, x86::rdx); // [2]
+//!   builder.shr(x86::rax, 3);        // [3]
+//!   // ----------------------------------------------------------------------
+//!
+//!   // Now, we know where our node is, and we can simply change the cursor
+//!   // and start emitting something after it. The setCursor() function
+//!   // returns the previous cursor, and it's always a good practice to remember
+//!   // it, because you never know if you are not already injecting the code
+//!   // somewhere else...
+//!   BaseNode* oldCursor = builder.setCursor(node);
+//!
+//!   builder.mul(x86::rax, 8);        // [4]
+//!
+//!   // Restore the cursor
+//!   builder.setCursor(oldCursor);
+//! }
+//! ```
+//!
+//! The function above would actually emit the following:
+//!
+//! ```
+//! mov rax, rcx ; [1] Patched at the beginning.
+//! mul rax, 8   ; [4] Injected.
+//! add rax, rdx ; [2] Followed [1] initially.
+//! shr rax, 3   ; [3] Follows [2].
+//! ```
+class ASMJIT_VIRTAPI Builder
+  : public BaseBuilder,
+    public EmitterImplicitT<Builder> {
+public:
+  ASMJIT_NONCOPYABLE(Builder)
+  typedef BaseBuilder Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Builder(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Builder() noexcept;
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_BUILDER
+#endif // ASMJIT_X86_X86BUILDER_H_INCLUDED
--- a/src/asmjit/x86/x86compiler.cpp
+++ b/src/asmjit/x86/x86compiler.cpp
@ -0,0 +1,61 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86) && !defined(ASMJIT_NO_COMPILER)
+
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86rapass_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Compiler - Construction & Destruction
+// ==========================================
+
+Compiler::Compiler(CodeHolder* code) noexcept : BaseCompiler() {
+  _archMask = (uint64_t(1) << uint32_t(Arch::kX86)) |
+              (uint64_t(1) << uint32_t(Arch::kX64)) ;
+  assignEmitterFuncs(this);
+
+  if (code)
+    code->attach(this);
+}
+Compiler::~Compiler() noexcept {}
+
+// x86::Compiler - Events
+// ======================
+
+Error Compiler::onAttach(CodeHolder* code) noexcept {
+  ASMJIT_PROPAGATE(Base::onAttach(code));
+  Error err = addPassT<X86RAPass>();
+
+  if (ASMJIT_UNLIKELY(err)) {
+    onDetach(code);
+    return err;
+  }
+
+  return kErrorOk;
+}
+
+Error Compiler::onDetach(CodeHolder* code) noexcept {
+  return Base::onDetach(code);
+}
+
+// x86::Compiler - Finalize
+// ========================
+
+Error Compiler::finalize() {
+  ASMJIT_PROPAGATE(runPasses());
+  Assembler a(_code);
+  a.addEncodingOptions(encodingOptions());
+  a.addDiagnosticOptions(diagnosticOptions());
+  return serializeTo(&a);
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86 && !ASMJIT_NO_COMPILER
--- a/src/asmjit/x86/x86compiler.h
+++ b/src/asmjit/x86/x86compiler.h
@ -0,0 +1,721 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86COMPILER_H_INCLUDED
+#define ASMJIT_X86_X86COMPILER_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/type.h"
+#include "../x86/x86emitter.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86/X64 compiler implementation.
+//!
+//! ### Compiler Basics
+//!
+//! The first \ref x86::Compiler example shows how to generate a function that simply returns an integer value. It's
+//! an analogy to the first Assembler example:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   cc.addFunc(FuncSignatureT<int>());// Begin a function of `int fn(void)` signature.
+//!
+//!   x86::Gp vReg = cc.newGpd();       // Create a 32-bit general purpose register.
+//!   cc.mov(vReg, 1);                  // Move one to our virtual register `vReg`.
+//!   cc.ret(vReg);                     // Return `vReg` from the function.
+//!
+//!   cc.endFunc();                     // End of the function body.
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   int result = fn();                // Execute the generated code.
+//!   printf("%d\n", result);           // Print the resulting "1".
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! The \ref BaseCompiler::addFunc() and \ref BaseCompiler::endFunc() functions are used to define the function and
+//! its end. Both must be called per function, but the body doesn't have to be generated in sequence. An example of
+//! generating two functions will be shown later. The next example shows more complicated code that contain a loop
+//! and generates a simple memory copy function that uses `uint32_t` items:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(  // Begin the function of the following signature:
+//!     FuncSignatureT<void,            //   Return value - void      (no return value).
+//!       uint32_t*,                    //   1st argument - uint32_t* (machine reg-size).
+//!       const uint32_t*,              //   2nd argument - uint32_t* (machine reg-size).
+//!       size_t>());                   //   3rd argument - size_t    (machine reg-size).
+//!
+//!   Label L_Loop = cc.newLabel();     // Start of the loop.
+//!   Label L_Exit = cc.newLabel();     // Used to exit early.
+//!
+//!   x86::Gp dst = cc.newIntPtr("dst");// Create `dst` register (destination pointer).
+//!   x86::Gp src = cc.newIntPtr("src");// Create `src` register (source pointer).
+//!   x86::Gp i = cc.newUIntPtr("i");   // Create `i` register (loop counter).
+//!
+//!   funcNode->setArg(0, dst);         // Assign `dst` argument.
+//!   funcNode->setArg(1, src);         // Assign `src` argument.
+//!   funcNode->setArg(2, i);           // Assign `i` argument.
+//!
+//!   cc.test(i, i);                    // Early exit if length is zero.
+//!   cc.jz(L_Exit);
+//!
+//!   cc.bind(L_Loop);                  // Bind the beginning of the loop here.
+//!
+//!   x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes).
+//!   cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address.
+//!   cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address.
+//!
+//!   cc.add(src, 4);                   // Increment `src`.
+//!   cc.add(dst, 4);                   // Increment `dst`.
+//!
+//!   cc.dec(i);                        // Loop until `i` is non-zero.
+//!   cc.jnz(L_Loop);
+//!
+//!   cc.bind(L_Exit);                  // Label used by early exit.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   // Add the generated code to the runtime.
+//!   MemCpy32 memcpy32;
+//!   Error err = rt.add(&memcpy32, &code);
+//!
+//!   // Handle a possible error returned by AsmJit.
+//!   if (err)
+//!     return 1;
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Test the generated code.
+//!   uint32_t input[6] = { 1, 2, 3, 5, 8, 13 };
+//!   uint32_t output[6];
+//!   memcpy32(output, input, 6);
+//!
+//!   for (uint32_t i = 0; i < 6; i++)
+//!     printf("%d\n", output[i]);
+//!
+//!   rt.release(memcpy32);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### AVX and AVX-512
+//!
+//! AVX and AVX-512 code generation must be explicitly enabled via \ref FuncFrame to work properly. If it's not setup
+//! correctly then Prolog & Epilog would use SSE instead of AVX instructions to work with SIMD registers. In addition,
+//! Compiler requires explicitly enable AVX-512 via \ref FuncFrame in order to use all 32 SIMD registers.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef void (*Func)(void*);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(FuncSignatureT<void, void*>());
+//!
+//!   // Use the following to enable AVX and/or AVX-512.
+//!   funcNode->frame().setAvxEnabled();
+//!   funcNode->frame().setAvx512Enabled();
+//!
+//!   // Do something with the input pointer.
+//!   x86::Gp addr = cc.newIntPtr("addr");
+//!   x86::Zmm vreg = cc.newZmm("vreg");
+//!
+//!   funcNode->setArg(0, addr);
+//!
+//!   cc.vmovdqu32(vreg, x86::ptr(addr));
+//!   cc.vpaddq(vreg, vreg, vreg);
+//!   cc.vmovdqu32(x86::ptr(addr), vreg);
+//!
+//!   cc.endFunc();                     // End of the function body.
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func fn;
+//!   Error err = rt.add(&fn, &code);   // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Execute the generated code and print some output.
+//!   uint64_t data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
+//!   fn(data);
+//!   printf("%llu\n", (unsigned long long)data[0]);
+//!
+//!   rt.release(fn);                   // Explicitly remove the function from the runtime.
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Recursive Functions
+//!
+//! It's possible to create more functions by using the same \ref x86::Compiler instance and make links between them.
+//! In such case it's important to keep the pointer to \ref FuncNode.
+//!
+//! The example below creates a simple Fibonacci function that calls itself recursively:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef uint32_t (*Fibonacci)(uint32_t x);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   FuncNode* funcNode = cc.addFunc(  // Begin of the Fibonacci function, addFunc()
+//!     FuncSignatureT<int, int>());    // Returns a pointer to the FuncNode node.
+//!
+//!   Label L_Exit = cc.newLabel()      // Exit label.
+//!   x86::Gp x = cc.newUInt32();       // Function x argument.
+//!   x86::Gp y = cc.newUInt32();       // Temporary.
+//!
+//!   funcNode->setArg(0, x);
+//!
+//!   cc.cmp(x, 3);                     // Return x if less than 3.
+//!   cc.jb(L_Exit);
+//!
+//!   cc.mov(y, x);                     // Make copy of the original x.
+//!   cc.dec(x);                        // Decrease x.
+//!
+//!   InvokeNode* invokeNode;           // Function invocation:
+//!   cc.invoke(&invokeNode,            //   - InvokeNode (output).
+//!     funcNode->label(),              //   - Function address or Label.
+//!     FuncSignatureT<int, int>());    //   - Function signature.
+//!
+//!   invokeNode->setArg(0, x);         // Assign x as the first argument.
+//!   invokeNode->setRet(0, x);         // Assign x as a return value as well.
+//!
+//!   cc.add(x, y);                     // Combine the return value with y.
+//!
+//!   cc.bind(L_Exit);
+//!   cc.ret(x);                        // Return x.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Fibonacci fib;
+//!   Error err = rt.add(&fib, &code);  // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   // Test the generated code.
+//!   printf("Fib(%u) -> %u\n", 8, fib(8));
+//!
+//!   rt.release(fib);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Stack Management
+//!
+//! Function's stack-frame is managed automatically, which is used by the register allocator to spill virtual
+//! registers. It also provides an interface to allocate user-defined block of the stack, which can be used as
+//! a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated,
+//! filled by bytes starting from 0 to 255 and then iterated again to sum all the values.
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//! #include <stdio.h>
+//!
+//! using namespace asmjit;
+//!
+//! // Signature of the generated function.
+//! typedef int (*Func)(void);
+//!
+//! int main() {
+//!   JitRuntime rt;                    // Runtime specialized for JIT code execution.
+//!   CodeHolder code;                  // Holds code and relocation information.
+//!
+//!   code.init(rt.environment());      // Initialize code to match the JIT environment.
+//!   x86::Compiler cc(&code);          // Create and attach x86::Compiler to code.
+//!
+//!   cc.addFunc(FuncSignatureT<int>());// Create a function that returns int.
+//!
+//!   x86::Gp p = cc.newIntPtr("p");
+//!   x86::Gp i = cc.newIntPtr("i");
+//!
+//!   // Allocate 256 bytes on the stack aligned to 4 bytes.
+//!   x86::Mem stack = cc.newStack(256, 4);
+//!
+//!   x86::Mem stackIdx(stack);         // Copy of stack with i added.
+//!   stackIdx.setIndex(i);             // stackIdx <- stack[i].
+//!   stackIdx.setSize(1);              // stackIdx <- byte ptr stack[i].
+//!
+//!   // Load a stack address to `p`. This step is purely optional and shows
+//!   // that `lea` is useful to load a memory operands address (even absolute)
+//!   // to a general purpose register.
+//!   cc.lea(p, stack);
+//!
+//!   // Clear i (xor is a C++ keyword, hence 'xor_' is used instead).
+//!   cc.xor_(i, i);
+//!
+//!   Label L1 = cc.newLabel();
+//!   Label L2 = cc.newLabel();
+//!
+//!   cc.bind(L1);                      // First loop, fill the stack.
+//!   cc.mov(stackIdx, i.r8());         // stack[i] = uint8_t(i).
+//!
+//!   cc.inc(i);                        // i++;
+//!   cc.cmp(i, 256);                   // if (i < 256)
+//!   cc.jb(L1);                        //   goto L1;
+//!
+//!   // Second loop, sum all bytes stored in `stack`.
+//!   x86::Gp sum = cc.newInt32("sum");
+//!   x86::Gp val = cc.newInt32("val");
+//!
+//!   cc.xor_(i, i);
+//!   cc.xor_(sum, sum);
+//!
+//!   cc.bind(L2);
+//!
+//!   cc.movzx(val, stackIdx);          // val = uint32_t(stack[i]);
+//!   cc.add(sum, val);                 // sum += val;
+//!
+//!   cc.inc(i);                        // i++;
+//!   cc.cmp(i, 256);                   // if (i < 256)
+//!   cc.jb(L2);                        //   goto L2;
+//!
+//!   cc.ret(sum);                      // Return the `sum` of all values.
+//!   cc.endFunc();                     // End of the function body.
+//!
+//!   cc.finalize();                    // Translate and assemble the whole 'cc' content.
+//!   // ----> x86::Compiler is no longer needed from here and can be destroyed <----
+//!
+//!   Func func;
+//!   Error err = rt.add(&func, &code); // Add the generated code to the runtime.
+//!   if (err) return 1;                // Handle a possible error returned by AsmJit.
+//!   // ----> CodeHolder is no longer needed from here and can be destroyed <----
+//!
+//!   printf("Func() -> %d\n", func()); // Test the generated code.
+//!
+//!   rt.release(func);
+//!   return 0;
+//! }
+//! ```
+//!
+//! ### Constant Pool
+//!
+//! Compiler provides two constant pools for a general purpose code generation:
+//!
+//!   - Local constant pool - Part of \ref FuncNode, can be only used by a single function and added after the
+//!     function epilog sequence (after `ret` instruction).
+//!
+//!   - Global constant pool - Part of \ref BaseCompiler, flushed at the end of the generated code by \ref
+//!     BaseEmitter::finalize().
+//!
+//! The example below illustrates how a built-in constant pool can be used:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void exampleUseOfConstPool(x86::Compiler& cc) {
+//!   cc.addFunc(FuncSignatureT<int>());
+//!
+//!   x86::Gp v0 = cc.newGpd("v0");
+//!   x86::Gp v1 = cc.newGpd("v1");
+//!
+//!   x86::Mem c0 = cc.newInt32Const(ConstPoolScope::kLocal, 200);
+//!   x86::Mem c1 = cc.newInt32Const(ConstPoolScope::kLocal, 33);
+//!
+//!   cc.mov(v0, c0);
+//!   cc.mov(v1, c1);
+//!   cc.add(v0, v1);
+//!
+//!   cc.ret(v0);
+//!   cc.endFunc();
+//! }
+//! ```
+//!
+//! ### Jump Tables
+//!
+//! x86::Compiler supports `jmp` instruction with reg/mem operand, which is a commonly used pattern to implement
+//! indirect jumps within a function, for example to implement `switch()` statement in a programming languages.
+//! By default AsmJit assumes that every basic block can be a possible jump target as it's unable to deduce targets
+//! from instruction's operands. This is a very pessimistic default that should be avoided if possible as it's costly
+//! and very unfriendly to liveness analysis and register allocation.
+//!
+//! Instead of relying on such pessimistic default behavior, let's use \ref JumpAnnotation to annotate a jump where
+//! all targets are known:
+//!
+//! ```
+//! #include <asmjit/x86.h>
+//!
+//! using namespace asmjit;
+//!
+//! static void exampleUseOfIndirectJump(x86::Compiler& cc) {
+//!   FuncNode* funcNode = cc.addFunc(FuncSignatureT<float, float, float, uint32_t>(CallConvId::kHost));
+//!
+//!   // Function arguments
+//!   x86::Xmm a = cc.newXmmSs("a");
+//!   x86::Xmm b = cc.newXmmSs("b");
+//!   x86::Gp op = cc.newUInt32("op");
+//!
+//!   x86::Gp target = cc.newIntPtr("target");
+//!   x86::Gp offset = cc.newIntPtr("offset");
+//!
+//!   Label L_Table = cc.newLabel();
+//!   Label L_Add = cc.newLabel();
+//!   Label L_Sub = cc.newLabel();
+//!   Label L_Mul = cc.newLabel();
+//!   Label L_Div = cc.newLabel();
+//!   Label L_End = cc.newLabel();
+//!
+//!   funcNode->setArg(0, a);
+//!   funcNode->setArg(1, b);
+//!   funcNode->setArg(2, op);
+//!
+//!   // Jump annotation is a building block that allows to annotate all possible targets where `jmp()` can
+//!   // jump. It then drives the CFG construction and liveness analysis, which impacts register allocation.
+//!   JumpAnnotation* annotation = cc.newJumpAnnotation();
+//!   annotation->addLabel(L_Add);
+//!   annotation->addLabel(L_Sub);
+//!   annotation->addLabel(L_Mul);
+//!   annotation->addLabel(L_Div);
+//!
+//!   // Most likely not the common indirect jump approach, but it
+//!   // doesn't really matter how final address is calculated. The
+//!   // most important path using JumpAnnotation with `jmp()`.
+//!   cc.lea(offset, x86::ptr(L_Table));
+//!   if (cc.is64Bit())
+//!     cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
+//!   else
+//!     cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
+//!   cc.add(target, offset);
+//!   cc.jmp(target, annotation);
+//!
+//!   // Acts like a switch() statement in C.
+//!   cc.bind(L_Add);
+//!   cc.addss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Sub);
+//!   cc.subss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Mul);
+//!   cc.mulss(a, b);
+//!   cc.jmp(L_End);
+//!
+//!   cc.bind(L_Div);
+//!   cc.divss(a, b);
+//!
+//!   cc.bind(L_End);
+//!   cc.ret(a);
+//!
+//!   cc.endFunc();
+//!
+//!   // Relative int32_t offsets of `L_XXX - L_Table`.
+//!   cc.bind(L_Table);
+//!   cc.embedLabelDelta(L_Add, L_Table, 4);
+//!   cc.embedLabelDelta(L_Sub, L_Table, 4);
+//!   cc.embedLabelDelta(L_Mul, L_Table, 4);
+//!   cc.embedLabelDelta(L_Div, L_Table, 4);
+//! }
+//! ```
+class ASMJIT_VIRTAPI Compiler
+  : public BaseCompiler,
+    public EmitterExplicitT<Compiler> {
+public:
+  ASMJIT_NONCOPYABLE(Compiler)
+  typedef BaseCompiler Base;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  ASMJIT_API explicit Compiler(CodeHolder* code = nullptr) noexcept;
+  ASMJIT_API virtual ~Compiler() noexcept;
+
+  //! \}
+
+  //! \name Virtual Registers
+  //! \{
+
+#ifndef ASMJIT_NO_LOGGING
+# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS)                         \
+    _newRegFmt(&OUT, PARAM, FORMAT, ARGS)
+#else
+# define ASMJIT_NEW_REG_FMT(OUT, PARAM, FORMAT, ARGS)                         \
+    DebugUtils::unused(FORMAT);                                               \
+    DebugUtils::unused(std::forward<Args>(args)...);                          \
+    _newReg(&OUT, PARAM)
+#endif
+
+#define ASMJIT_NEW_REG_CUSTOM(FUNC, REG)                                      \
+    inline REG FUNC(TypeId typeId) {                                          \
+      REG reg(Globals::NoInit);                                               \
+      _newReg(&reg, typeId);                                                  \
+      return reg;                                                             \
+    }                                                                         \
+                                                                              \
+    template<typename... Args>                                                \
+    inline REG FUNC(TypeId typeId, const char* fmt, Args&&... args) {         \
+      REG reg(Globals::NoInit);                                               \
+      ASMJIT_NEW_REG_FMT(reg, typeId, fmt, std::forward<Args>(args)...);      \
+      return reg;                                                             \
+    }
+
+#define ASMJIT_NEW_REG_TYPED(FUNC, REG, TYPE_ID)                              \
+    inline REG FUNC() {                                                       \
+      REG reg(Globals::NoInit);                                               \
+      _newReg(&reg, TYPE_ID);                                                 \
+      return reg;                                                             \
+    }                                                                         \
+                                                                              \
+    template<typename... Args>                                                \
+    inline REG FUNC(const char* fmt, Args&&... args) {                        \
+      REG reg(Globals::NoInit);                                               \
+      ASMJIT_NEW_REG_FMT(reg, TYPE_ID, fmt, std::forward<Args>(args)...);     \
+      return reg;                                                             \
+    }
+
+  template<typename RegT>
+  inline RegT newSimilarReg(const RegT& ref) {
+    RegT reg(Globals::NoInit);
+    _newReg(reg, ref);
+    return reg;
+  }
+
+  template<typename RegT, typename... Args>
+  inline RegT newSimilarReg(const RegT& ref, const char* fmt, Args&&... args) {
+    RegT reg(Globals::NoInit);
+    ASMJIT_NEW_REG_FMT(reg, ref, fmt, std::forward<Args>(args)...);
+    return reg;
+  }
+
+  ASMJIT_NEW_REG_CUSTOM(newReg    , Reg )
+  ASMJIT_NEW_REG_CUSTOM(newGp     , Gp  )
+  ASMJIT_NEW_REG_CUSTOM(newVec    , Vec )
+  ASMJIT_NEW_REG_CUSTOM(newK      , KReg)
+
+  ASMJIT_NEW_REG_TYPED(newInt8   , Gp  , TypeId::kInt8)
+  ASMJIT_NEW_REG_TYPED(newUInt8  , Gp  , TypeId::kUInt8)
+  ASMJIT_NEW_REG_TYPED(newInt16  , Gp  , TypeId::kInt16)
+  ASMJIT_NEW_REG_TYPED(newUInt16 , Gp  , TypeId::kUInt16)
+  ASMJIT_NEW_REG_TYPED(newInt32  , Gp  , TypeId::kInt32)
+  ASMJIT_NEW_REG_TYPED(newUInt32 , Gp  , TypeId::kUInt32)
+  ASMJIT_NEW_REG_TYPED(newInt64  , Gp  , TypeId::kInt64)
+  ASMJIT_NEW_REG_TYPED(newUInt64 , Gp  , TypeId::kUInt64)
+  ASMJIT_NEW_REG_TYPED(newIntPtr , Gp  , TypeId::kIntPtr)
+  ASMJIT_NEW_REG_TYPED(newUIntPtr, Gp  , TypeId::kUIntPtr)
+
+  ASMJIT_NEW_REG_TYPED(newGpb    , Gp  , TypeId::kUInt8)
+  ASMJIT_NEW_REG_TYPED(newGpw    , Gp  , TypeId::kUInt16)
+  ASMJIT_NEW_REG_TYPED(newGpd    , Gp  , TypeId::kUInt32)
+  ASMJIT_NEW_REG_TYPED(newGpq    , Gp  , TypeId::kUInt64)
+  ASMJIT_NEW_REG_TYPED(newGpz    , Gp  , TypeId::kUIntPtr)
+  ASMJIT_NEW_REG_TYPED(newXmm    , Xmm , TypeId::kInt32x4)
+  ASMJIT_NEW_REG_TYPED(newXmmSs  , Xmm , TypeId::kFloat32x1)
+  ASMJIT_NEW_REG_TYPED(newXmmSd  , Xmm , TypeId::kFloat64x1)
+  ASMJIT_NEW_REG_TYPED(newXmmPs  , Xmm , TypeId::kFloat32x4)
+  ASMJIT_NEW_REG_TYPED(newXmmPd  , Xmm , TypeId::kFloat64x2)
+  ASMJIT_NEW_REG_TYPED(newYmm    , Ymm , TypeId::kInt32x8)
+  ASMJIT_NEW_REG_TYPED(newYmmPs  , Ymm , TypeId::kFloat32x8)
+  ASMJIT_NEW_REG_TYPED(newYmmPd  , Ymm , TypeId::kFloat64x4)
+  ASMJIT_NEW_REG_TYPED(newZmm    , Zmm , TypeId::kInt32x16)
+  ASMJIT_NEW_REG_TYPED(newZmmPs  , Zmm , TypeId::kFloat32x16)
+  ASMJIT_NEW_REG_TYPED(newZmmPd  , Zmm , TypeId::kFloat64x8)
+  ASMJIT_NEW_REG_TYPED(newMm     , Mm  , TypeId::kMmx64)
+  ASMJIT_NEW_REG_TYPED(newKb     , KReg, TypeId::kMask8)
+  ASMJIT_NEW_REG_TYPED(newKw     , KReg, TypeId::kMask16)
+  ASMJIT_NEW_REG_TYPED(newKd     , KReg, TypeId::kMask32)
+  ASMJIT_NEW_REG_TYPED(newKq     , KReg, TypeId::kMask64)
+
+#undef ASMJIT_NEW_REG_TYPED
+#undef ASMJIT_NEW_REG_CUSTOM
+#undef ASMJIT_NEW_REG_FMT
+
+  //! \}
+
+  //! \name Stack
+  //! \{
+
+  //! Creates a new memory chunk allocated on the current function's stack.
+  inline Mem newStack(uint32_t size, uint32_t alignment, const char* name = nullptr) {
+    Mem m(Globals::NoInit);
+    _newStack(&m, size, alignment, name);
+    return m;
+  }
+
+  //! \}
+
+  //! \name Constants
+  //! \{
+
+  //! Put data to a constant-pool and get a memory reference to it.
+  inline Mem newConst(ConstPoolScope scope, const void* data, size_t size) {
+    Mem m(Globals::NoInit);
+    _newConst(&m, scope, data, size);
+    return m;
+  }
+
+  //! Put a BYTE `val` to a constant-pool.
+  inline Mem newByteConst(ConstPoolScope scope, uint8_t val) noexcept { return newConst(scope, &val, 1); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newWordConst(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newDWordConst(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newQWordConst(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newInt16Const(ConstPoolScope scope, int16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a WORD `val` to a constant-pool.
+  inline Mem newUInt16Const(ConstPoolScope scope, uint16_t val) noexcept { return newConst(scope, &val, 2); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newInt32Const(ConstPoolScope scope, int32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DWORD `val` to a constant-pool.
+  inline Mem newUInt32Const(ConstPoolScope scope, uint32_t val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newInt64Const(ConstPoolScope scope, int64_t val) noexcept { return newConst(scope, &val, 8); }
+  //! Put a QWORD `val` to a constant-pool.
+  inline Mem newUInt64Const(ConstPoolScope scope, uint64_t val) noexcept { return newConst(scope, &val, 8); }
+
+  //! Put a SP-FP `val` to a constant-pool.
+  inline Mem newFloatConst(ConstPoolScope scope, float val) noexcept { return newConst(scope, &val, 4); }
+  //! Put a DP-FP `val` to a constant-pool.
+  inline Mem newDoubleConst(ConstPoolScope scope, double val) noexcept { return newConst(scope, &val, 8); }
+
+  //! \}
+
+  //! \name Instruction Options
+  //! \{
+
+  //! Force the compiler to not follow the conditional or unconditional jump.
+  inline Compiler& unfollow() noexcept { addInstOptions(InstOptions::kUnfollow); return *this; }
+  //! Tell the compiler that the destination variable will be overwritten.
+  inline Compiler& overwrite() noexcept { addInstOptions(InstOptions::kOverwrite); return *this; }
+
+  //! \}
+
+  //! \name Function Call & Ret Intrinsics
+  //! \{
+
+  //! Invoke a function call without `target` type enforcement.
+  inline Error invoke_(InvokeNode** out, const Operand_& target, const FuncSignature& signature) {
+    return addInvokeNode(out, Inst::kIdCall, target, signature);
+  }
+
+  //! Invoke a function call of the given `target` and `signature` and store the added node to `out`.
+  //!
+  //! Creates a new \ref InvokeNode, initializes all the necessary members to match the given function `signature`,
+  //! adds the node to the compiler, and stores its pointer to `out`. The operation is atomic, if anything fails
+  //! nullptr is stored in `out` and error code is returned.
+  inline Error invoke(InvokeNode** out, const Gp& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Mem& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Label& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, const Imm& target, const FuncSignature& signature) { return invoke_(out, target, signature); }
+  //! \overload
+  inline Error invoke(InvokeNode** out, uint64_t target, const FuncSignature& signature) { return invoke_(out, Imm(int64_t(target)), signature); }
+
+  //! Return from function.
+  inline Error ret() { return addRet(Operand(), Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0) { return addRet(o0, Operand()); }
+  //! \overload
+  inline Error ret(const BaseReg& o0, const BaseReg& o1) { return addRet(o0, o1); }
+
+  //! \}
+
+  //! \name Jump Tables Support
+  //! \{
+
+  using EmitterExplicitT<Compiler>::jmp;
+
+  //! Adds a jump to the given `target` with the provided jump `annotation`.
+  inline Error jmp(const BaseReg& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
+  //! \overload
+  inline Error jmp(const BaseMem& target, JumpAnnotation* annotation) { return emitAnnotatedJump(Inst::kIdJmp, target, annotation); }
+
+  //! \}
+
+  //! \name Events
+  //! \{
+
+  ASMJIT_API Error onAttach(CodeHolder* code) noexcept override;
+  ASMJIT_API Error onDetach(CodeHolder* code) noexcept override;
+
+  //! \}
+
+  //! \name Finalize
+  //! \{
+
+  ASMJIT_API Error finalize() override;
+
+  //! \}
+};
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_X86_X86COMPILER_H_INCLUDED
--- a/src/asmjit/x86/x86emithelper.cpp
+++ b/src/asmjit/x86/x86emithelper.cpp
@ -0,0 +1,619 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/formatter.h"
+#include "../core/funcargscontext_p.h"
+#include "../core/string.h"
+#include "../core/support.h"
+#include "../core/type.h"
+#include "../core/radefs_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86instapi_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::EmitHelper - Utilities
+// ===========================
+
+static inline uint32_t getXmmMovInst(const FuncFrame& frame) {
+  bool avx = frame.isAvxEnabled();
+  bool aligned = frame.hasAlignedVecSR();
+
+  return aligned ? (avx ? Inst::kIdVmovaps : Inst::kIdMovaps)
+                 : (avx ? Inst::kIdVmovups : Inst::kIdMovups);
+}
+
+//! Converts `size` to a 'kmov?' instruction.
+static inline uint32_t kmovInstFromSize(uint32_t size) noexcept {
+  switch (size) {
+    case  1: return Inst::kIdKmovb;
+    case  2: return Inst::kIdKmovw;
+    case  4: return Inst::kIdKmovd;
+    case  8: return Inst::kIdKmovq;
+    default: return Inst::kIdNone;
+  }
+}
+
+static inline uint32_t makeCastOp(TypeId dst, TypeId src) noexcept {
+  return (uint32_t(dst) << 8) | uint32_t(src);
+}
+
+// x86::EmitHelper - Emit Reg Move
+// ===============================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitRegMove(
+  const Operand_& dst_,
+  const Operand_& src_, TypeId typeId, const char* comment) {
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(typeId) && !TypeUtils::isAbstract(typeId));
+
+  Operand dst(dst_);
+  Operand src(src_);
+
+  InstId instId = Inst::kIdNone;
+  uint32_t memFlags = 0;
+  uint32_t overrideMemSize = 0;
+
+  enum MemFlags : uint32_t {
+    kDstMem = 0x1,
+    kSrcMem = 0x2
+  };
+
+  // Detect memory operands and patch them to have the same size as the register. BaseCompiler always sets memory size
+  // of allocs and spills, so it shouldn't be really necessary, however, after this function was separated from Compiler
+  // it's better to make sure that the size is always specified, as we can use 'movzx' and 'movsx' that rely on it.
+  if (dst.isMem()) { memFlags |= kDstMem; dst.as<Mem>().setSize(src.size()); }
+  if (src.isMem()) { memFlags |= kSrcMem; src.as<Mem>().setSize(dst.size()); }
+
+  switch (typeId) {
+    case TypeId::kInt8:
+    case TypeId::kUInt8:
+    case TypeId::kInt16:
+    case TypeId::kUInt16:
+      // Special case - 'movzx' load.
+      if (memFlags & kSrcMem) {
+        instId = Inst::kIdMovzx;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (!memFlags) {
+        // Change both destination and source registers to GPD (safer, no dependencies).
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+      }
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kInt32:
+    case TypeId::kUInt32:
+    case TypeId::kInt64:
+    case TypeId::kUInt64:
+      instId = Inst::kIdMov;
+      break;
+
+    case TypeId::kMmx32:
+      instId = Inst::kIdMovd;
+      if (memFlags) break;
+      ASMJIT_FALLTHROUGH;
+
+    case TypeId::kMmx64 : instId = Inst::kIdMovq ; break;
+    case TypeId::kMask8 : instId = Inst::kIdKmovb; break;
+    case TypeId::kMask16: instId = Inst::kIdKmovw; break;
+    case TypeId::kMask32: instId = Inst::kIdKmovd; break;
+    case TypeId::kMask64: instId = Inst::kIdKmovq; break;
+
+    default: {
+      TypeId scalarTypeId = TypeUtils::scalarOf(typeId);
+      if (TypeUtils::isVec32(typeId) && memFlags) {
+        overrideMemSize = 4;
+        if (scalarTypeId == TypeId::kFloat32)
+          instId = _avxEnabled ? Inst::kIdVmovss : Inst::kIdMovss;
+        else
+          instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+        break;
+      }
+
+      if (TypeUtils::isVec64(typeId) && memFlags) {
+        overrideMemSize = 8;
+        if (scalarTypeId == TypeId::kFloat64)
+          instId = _avxEnabled ? Inst::kIdVmovsd : Inst::kIdMovsd;
+        else
+          instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+        break;
+      }
+
+      if (scalarTypeId == TypeId::kFloat32)
+        instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+      else if (scalarTypeId == TypeId::kFloat64)
+        instId = _avxEnabled ? Inst::kIdVmovapd : Inst::kIdMovapd;
+      else if (!_avx512Enabled)
+        instId = _avxEnabled ? Inst::kIdVmovdqa : Inst::kIdMovdqa;
+      else
+        instId = Inst::kIdVmovdqa32;
+      break;
+    }
+  }
+
+  if (!instId)
+    return DebugUtils::errored(kErrorInvalidState);
+
+  if (overrideMemSize) {
+    if (dst.isMem()) dst.as<Mem>().setSize(overrideMemSize);
+    if (src.isMem()) src.as<Mem>().setSize(overrideMemSize);
+  }
+
+  _emitter->setInlineComment(comment);
+  return _emitter->emit(instId, dst, src);
+}
+
+// x86::EmitHelper - Emit Arg Move
+// ===============================
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitArgMove(
+  const BaseReg& dst_, TypeId dstTypeId,
+  const Operand_& src_, TypeId srcTypeId, const char* comment) {
+
+  // Deduce optional `dstTypeId`, which may be `TypeId::kVoid` in some cases.
+  if (dstTypeId == TypeId::kVoid) {
+    const ArchTraits& archTraits = ArchTraits::byArch(_emitter->arch());
+    dstTypeId = archTraits.regTypeToTypeId(dst_.type());
+  }
+
+  // Invalid or abstract TypeIds are not allowed.
+  ASMJIT_ASSERT(TypeUtils::isValid(dstTypeId) && !TypeUtils::isAbstract(dstTypeId));
+  ASMJIT_ASSERT(TypeUtils::isValid(srcTypeId) && !TypeUtils::isAbstract(srcTypeId));
+
+  Reg dst(dst_.as<Reg>());
+  Operand src(src_);
+
+  uint32_t dstSize = TypeUtils::sizeOf(dstTypeId);
+  uint32_t srcSize = TypeUtils::sizeOf(srcTypeId);
+
+  InstId instId = Inst::kIdNone;
+
+  // Not a real loop, just 'break' is nicer than 'goto'.
+  for (;;) {
+    if (TypeUtils::isInt(dstTypeId)) {
+      if (TypeUtils::isInt(srcTypeId)) {
+        instId = Inst::kIdMovsx;
+        uint32_t castOp = makeCastOp(dstTypeId, srcTypeId);
+
+        // Sign extend by using 'movsx'.
+        if (castOp == makeCastOp(TypeId::kInt16, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt32, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt32, TypeId::kInt16) ||
+            castOp == makeCastOp(TypeId::kInt64, TypeId::kInt8 ) ||
+            castOp == makeCastOp(TypeId::kInt64, TypeId::kInt16))
+          break;
+
+        // Sign extend by using 'movsxd'.
+        instId = Inst::kIdMovsxd;
+        if (castOp == makeCastOp(TypeId::kInt64, TypeId::kInt32))
+          break;
+      }
+
+      if (TypeUtils::isInt(srcTypeId) || src_.isMem()) {
+        // Zero extend by using 'movzx' or 'mov'.
+        if (dstSize <= 4 && srcSize < 4) {
+          instId = Inst::kIdMovzx;
+          dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        }
+        else {
+          // We should have caught all possibilities where `srcSize` is less than 4, so we don't have to worry
+          // about 'movzx' anymore. Minimum size is enough to determine if we want 32-bit or 64-bit move.
+          instId = Inst::kIdMov;
+          srcSize = Support::min(srcSize, dstSize);
+
+          dst.setSignature(srcSize == 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
+                                        : Reg::signatureOfT<RegType::kX86_Gpq>());
+          if (src.isReg())
+            src.setSignature(dst.signature());
+        }
+        break;
+      }
+
+      // NOTE: The previous branch caught all memory sources, from here it's always register to register conversion,
+      // so catch the remaining cases.
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isMmx(srcTypeId)) {
+        // 64-bit move.
+        instId = Inst::kIdMovq;
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = Inst::kIdMovd;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (TypeUtils::isMask(srcTypeId)) {
+        instId = kmovInstFromSize(srcSize);
+        dst.setSignature(srcSize <= 4 ? Reg::signatureOfT<RegType::kX86_Gpd>()
+                                      : Reg::signatureOfT<RegType::kX86_Gpq>());
+        break;
+      }
+
+      if (TypeUtils::isVec(srcTypeId)) {
+        // 64-bit move.
+        instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+        dst.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+    }
+
+    if (TypeUtils::isMmx(dstTypeId)) {
+      instId = Inst::kIdMovq;
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isInt(srcTypeId) || src.isMem()) {
+        // 64-bit move.
+        if (srcSize == 8)
+          break;
+
+        // 32-bit move.
+        instId = Inst::kIdMovd;
+        if (src.isReg())
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+
+      if (TypeUtils::isMmx(srcTypeId))
+        break;
+
+      // This will hurt if AVX is enabled.
+      instId = Inst::kIdMovdq2q;
+      if (TypeUtils::isVec(srcTypeId))
+        break;
+    }
+
+    if (TypeUtils::isMask(dstTypeId)) {
+      srcSize = Support::min(srcSize, dstSize);
+
+      if (TypeUtils::isInt(srcTypeId) || TypeUtils::isMask(srcTypeId) || src.isMem()) {
+        instId = kmovInstFromSize(srcSize);
+        if (Reg::isGp(src) && srcSize <= 4)
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+        break;
+      }
+    }
+
+    if (TypeUtils::isVec(dstTypeId)) {
+      // By default set destination to XMM, will be set to YMM|ZMM if needed.
+      dst.setSignature(Reg::signatureOfT<RegType::kX86_Xmm>());
+
+      // This will hurt if AVX is enabled.
+      if (Reg::isMm(src)) {
+        // 64-bit move.
+        instId = Inst::kIdMovq2dq;
+        break;
+      }
+
+      // Argument conversion.
+      TypeId dstScalarId = TypeUtils::scalarOf(dstTypeId);
+      TypeId srcScalarId = TypeUtils::scalarOf(srcTypeId);
+
+      if (dstScalarId == TypeId::kFloat32 && srcScalarId == TypeId::kFloat64) {
+        srcSize = Support::min(dstSize * 2, srcSize);
+        dstSize = srcSize / 2;
+
+        if (srcSize <= 8)
+          instId = _avxEnabled ? Inst::kIdVcvtss2sd : Inst::kIdCvtss2sd;
+        else
+          instId = _avxEnabled ? Inst::kIdVcvtps2pd : Inst::kIdCvtps2pd;
+
+        if (dstSize == 32)
+          dst.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
+        if (src.isReg())
+          src.setSignature(Reg::signatureOfVecBySize(srcSize));
+        break;
+      }
+
+      if (dstScalarId == TypeId::kFloat64 && srcScalarId == TypeId::kFloat32) {
+        srcSize = Support::min(dstSize, srcSize * 2) / 2;
+        dstSize = srcSize * 2;
+
+        if (srcSize <= 4)
+          instId = _avxEnabled ? Inst::kIdVcvtsd2ss : Inst::kIdCvtsd2ss;
+        else
+          instId = _avxEnabled ? Inst::kIdVcvtpd2ps : Inst::kIdCvtpd2ps;
+
+        dst.setSignature(Reg::signatureOfVecBySize(dstSize));
+        if (src.isReg() && srcSize >= 32)
+          src.setSignature(Reg::signatureOfT<RegType::kX86_Ymm>());
+        break;
+      }
+
+      srcSize = Support::min(srcSize, dstSize);
+      if (Reg::isGp(src) || src.isMem()) {
+        // 32-bit move.
+        if (srcSize <= 4) {
+          instId = _avxEnabled ? Inst::kIdVmovd : Inst::kIdMovd;
+          if (src.isReg())
+            src.setSignature(Reg::signatureOfT<RegType::kX86_Gpd>());
+          break;
+        }
+
+        // 64-bit move.
+        if (srcSize == 8) {
+          instId = _avxEnabled ? Inst::kIdVmovq : Inst::kIdMovq;
+          break;
+        }
+      }
+
+      if (Reg::isVec(src) || src.isMem()) {
+        instId = _avxEnabled ? Inst::kIdVmovaps : Inst::kIdMovaps;
+
+        if (src.isMem() && srcSize < _emitter->environment().stackAlignment())
+          instId = _avxEnabled ? Inst::kIdVmovups : Inst::kIdMovups;
+
+        OperandSignature signature = Reg::signatureOfVecBySize(srcSize);
+        dst.setSignature(signature);
+        if (src.isReg())
+          src.setSignature(signature);
+        break;
+      }
+    }
+
+    return DebugUtils::errored(kErrorInvalidState);
+  }
+
+  if (src.isMem())
+    src.as<Mem>().setSize(srcSize);
+
+  _emitter->setInlineComment(comment);
+  return _emitter->emit(instId, dst, src);
+}
+
+Error EmitHelper::emitRegSwap(
+  const BaseReg& a,
+  const BaseReg& b, const char* comment) {
+
+  if (a.isGp() && b.isGp()) {
+    _emitter->setInlineComment(comment);
+    return _emitter->emit(Inst::kIdXchg, a, b);
+  }
+  else
+    return DebugUtils::errored(kErrorInvalidState);
+}
+
+// x86::EmitHelper - Emit Prolog & Epilog
+// ======================================
+
+static inline void X86Internal_setupSaveRestoreInfo(RegGroup group, const FuncFrame& frame, Reg& xReg, uint32_t& xInst, uint32_t& xSize) noexcept {
+  switch (group) {
+    case RegGroup::kVec:
+      xReg = xmm(0);
+      xInst = getXmmMovInst(frame);
+      xSize = xReg.size();
+      break;
+    case RegGroup::kX86_K:
+      xReg = k(0);
+      xInst = Inst::kIdKmovq;
+      xSize = xReg.size();
+      break;
+    case RegGroup::kX86_MM:
+      xReg = mm(0);
+      xInst = Inst::kIdMovq;
+      xSize = xReg.size();
+      break;
+    default:
+      break;
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitProlog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+  uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
+
+  Gp zsp = emitter->zsp();   // ESP|RSP register.
+  Gp zbp = emitter->zbp();   // EBP|RBP register.
+  Gp gpReg = zsp;            // General purpose register (temporary).
+  Gp saReg = zsp;            // Stack-arguments base pointer.
+
+  // Emit: 'push zbp'
+  //       'mov  zbp, zsp'.
+  if (frame.hasPreservedFP()) {
+    gpSaved &= ~Support::bitMask(Gp::kIdBp);
+    ASMJIT_PROPAGATE(emitter->push(zbp));
+    ASMJIT_PROPAGATE(emitter->mov(zbp, zsp));
+  }
+
+  // Emit: 'push gp' sequence.
+  {
+    Support::BitWordIterator<RegMask> it(gpSaved);
+    while (it.hasNext()) {
+      gpReg.setId(it.next());
+      ASMJIT_PROPAGATE(emitter->push(gpReg));
+    }
+  }
+
+  // Emit: 'mov saReg, zsp'.
+  uint32_t saRegId = frame.saRegId();
+  if (saRegId != BaseReg::kIdBad && saRegId != Gp::kIdSp) {
+    saReg.setId(saRegId);
+    if (frame.hasPreservedFP()) {
+      if (saRegId != Gp::kIdBp)
+        ASMJIT_PROPAGATE(emitter->mov(saReg, zbp));
+    }
+    else {
+      ASMJIT_PROPAGATE(emitter->mov(saReg, zsp));
+    }
+  }
+
+  // Emit: 'and zsp, StackAlignment'.
+  if (frame.hasDynamicAlignment()) {
+    ASMJIT_PROPAGATE(emitter->and_(zsp, -int32_t(frame.finalStackAlignment())));
+  }
+
+  // Emit: 'sub zsp, StackAdjustment'.
+  if (frame.hasStackAdjustment()) {
+    ASMJIT_PROPAGATE(emitter->sub(zsp, frame.stackAdjustment()));
+  }
+
+  // Emit: 'mov [zsp + DAOffset], saReg'.
+  if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+    Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+    ASMJIT_PROPAGATE(emitter->mov(saMem, saReg));
+  }
+
+  // Emit 'movxxx [zsp + X], {[x|y|z]mm, k}'.
+  {
+    Reg xReg;
+    Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
+
+    uint32_t xInst;
+    uint32_t xSize;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
+      Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
+      if (it.hasNext()) {
+        X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+        do {
+          xReg.setId(it.next());
+          ASMJIT_PROPAGATE(emitter->emit(xInst, xBase, xReg));
+          xBase.addOffsetLo32(int32_t(xSize));
+        } while (it.hasNext());
+      }
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE Error EmitHelper::emitEpilog(const FuncFrame& frame) {
+  Emitter* emitter = _emitter->as<Emitter>();
+
+  uint32_t i;
+  uint32_t regId;
+
+  uint32_t registerSize = emitter->registerSize();
+  uint32_t gpSaved = frame.savedRegs(RegGroup::kGp);
+
+  Gp zsp = emitter->zsp();   // ESP|RSP register.
+  Gp zbp = emitter->zbp();   // EBP|RBP register.
+  Gp gpReg = emitter->zsp(); // General purpose register (temporary).
+
+  // Don't emit 'pop zbp' in the pop sequence, this case is handled separately.
+  if (frame.hasPreservedFP())
+    gpSaved &= ~Support::bitMask(Gp::kIdBp);
+
+  // Emit 'movxxx {[x|y|z]mm, k}, [zsp + X]'.
+  {
+    Reg xReg;
+    Mem xBase = ptr(zsp, int32_t(frame.extraRegSaveOffset()));
+
+    uint32_t xInst;
+    uint32_t xSize;
+
+    for (RegGroup group : Support::EnumValues<RegGroup, RegGroup(1), RegGroup::kMaxVirt>{}) {
+      Support::BitWordIterator<RegMask> it(frame.savedRegs(group));
+      if (it.hasNext()) {
+        X86Internal_setupSaveRestoreInfo(group, frame, xReg, xInst, xSize);
+        do {
+          xReg.setId(it.next());
+          ASMJIT_PROPAGATE(emitter->emit(xInst, xReg, xBase));
+          xBase.addOffsetLo32(int32_t(xSize));
+        } while (it.hasNext());
+      }
+    }
+  }
+
+  // Emit 'emms' and/or 'vzeroupper'.
+  if (frame.hasMmxCleanup()) ASMJIT_PROPAGATE(emitter->emms());
+  if (frame.hasAvxCleanup()) ASMJIT_PROPAGATE(emitter->vzeroupper());
+
+  if (frame.hasPreservedFP()) {
+    // Emit 'mov zsp, zbp' or 'lea zsp, [zbp - x]'
+    int32_t count = int32_t(frame.pushPopSaveSize() - registerSize);
+    if (!count)
+      ASMJIT_PROPAGATE(emitter->mov(zsp, zbp));
+    else
+      ASMJIT_PROPAGATE(emitter->lea(zsp, ptr(zbp, -count)));
+  }
+  else {
+    if (frame.hasDynamicAlignment() && frame.hasDAOffset()) {
+      // Emit 'mov zsp, [zsp + DsaSlot]'.
+      Mem saMem = ptr(zsp, int32_t(frame.daOffset()));
+      ASMJIT_PROPAGATE(emitter->mov(zsp, saMem));
+    }
+    else if (frame.hasStackAdjustment()) {
+      // Emit 'add zsp, StackAdjustment'.
+      ASMJIT_PROPAGATE(emitter->add(zsp, int32_t(frame.stackAdjustment())));
+    }
+  }
+
+  // Emit 'pop gp' sequence.
+  if (gpSaved) {
+    i = gpSaved;
+    regId = 16;
+
+    do {
+      regId--;
+      if (i & 0x8000) {
+        gpReg.setId(regId);
+        ASMJIT_PROPAGATE(emitter->pop(gpReg));
+      }
+      i <<= 1;
+    } while (regId != 0);
+  }
+
+  // Emit 'pop zbp'.
+  if (frame.hasPreservedFP())
+    ASMJIT_PROPAGATE(emitter->pop(zbp));
+
+  // Emit 'ret' or 'ret x'.
+  if (frame.hasCalleeStackCleanup())
+    ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet, int(frame.calleeStackCleanup())));
+  else
+    ASMJIT_PROPAGATE(emitter->emit(Inst::kIdRet));
+
+  return kErrorOk;
+}
+
+static Error ASMJIT_CDECL Emitter_emitProlog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitProlog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitEpilog(BaseEmitter* emitter, const FuncFrame& frame) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitEpilog(frame);
+}
+
+static Error ASMJIT_CDECL Emitter_emitArgsAssignment(BaseEmitter* emitter, const FuncFrame& frame, const FuncArgsAssignment& args) {
+  EmitHelper emitHelper(emitter, frame.isAvxEnabled(), frame.isAvx512Enabled());
+  return emitHelper.emitArgsAssignment(frame, args);
+}
+
+void assignEmitterFuncs(BaseEmitter* emitter) {
+  emitter->_funcs.emitProlog = Emitter_emitProlog;
+  emitter->_funcs.emitEpilog = Emitter_emitEpilog;
+  emitter->_funcs.emitArgsAssignment = Emitter_emitArgsAssignment;
+
+#ifndef ASMJIT_NO_LOGGING
+  emitter->_funcs.formatInstruction = FormatterInternal::formatInstruction;
+#endif
+
+#ifndef ASMJIT_NO_VALIDATION
+  emitter->_funcs.validate = InstInternal::validate;
+#endif
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
--- a/src/asmjit/x86/x86emithelper_p.h
+++ b/src/asmjit/x86/x86emithelper_p.h
@ -0,0 +1,60 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
+#define ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+
+#include "../core/emithelper_p.h"
+#include "../core/func.h"
+#include "../x86/x86emitter.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+static inline RegType vecTypeIdToRegType(TypeId typeId) noexcept {
+  return uint32_t(typeId) <= uint32_t(TypeId::_kVec128End) ? RegType::kX86_Xmm :
+         uint32_t(typeId) <= uint32_t(TypeId::_kVec256End) ? RegType::kX86_Ymm : RegType::kX86_Zmm;
+}
+
+class EmitHelper : public BaseEmitHelper {
+public:
+  bool _avxEnabled;
+  bool _avx512Enabled;
+
+  inline explicit EmitHelper(BaseEmitter* emitter = nullptr, bool avxEnabled = false, bool avx512Enabled = false) noexcept
+    : BaseEmitHelper(emitter),
+      _avxEnabled(avxEnabled || avx512Enabled),
+      _avx512Enabled(avx512Enabled) {}
+
+  Error emitRegMove(
+    const Operand_& dst_,
+    const Operand_& src_, TypeId typeId, const char* comment = nullptr) override;
+
+  Error emitArgMove(
+    const BaseReg& dst_, TypeId dstTypeId,
+    const Operand_& src_, TypeId srcTypeId, const char* comment = nullptr) override;
+
+  Error emitRegSwap(
+    const BaseReg& a,
+    const BaseReg& b, const char* comment = nullptr) override;
+
+  Error emitProlog(const FuncFrame& frame);
+  Error emitEpilog(const FuncFrame& frame);
+};
+
+void assignEmitterFuncs(BaseEmitter* emitter);
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86EMITHELPER_P_H_INCLUDED
--- a/src/asmjit/x86/x86emitter.h
+++ b/src/asmjit/x86/x86emitter.h
--- a/src/asmjit/x86/x86formatter.cpp
+++ b/src/asmjit/x86/x86formatter.cpp
@ -0,0 +1,948 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/cpuinfo.h"
+#include "../core/misc_p.h"
+#include "../core/support.h"
+#include "../x86/x86formatter_p.h"
+#include "../x86/x86instapi_p.h"
+#include "../x86/x86instdb_p.h"
+#include "../x86/x86operand.h"
+
+#ifndef ASMJIT_NO_COMPILER
+  #include "../core/compiler.h"
+#endif
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::FormatterInternal - Constants
+// ==================================
+
+struct RegFormatInfo {
+  struct TypeEntry {
+    uint8_t index;
+  };
+
+  struct NameEntry {
+    uint8_t count;
+    uint8_t formatIndex;
+    uint8_t specialIndex;
+    uint8_t specialCount;
+  };
+
+  TypeEntry typeEntries[uint32_t(RegType::kMaxValue) + 1];
+  char typeStrings[128 - 32];
+
+  NameEntry nameEntries[uint32_t(RegType::kMaxValue) + 1];
+  char nameStrings[280];
+};
+
+template<uint32_t X>
+struct RegFormatInfo_T {
+  enum {
+    kTypeIndex    = X == uint32_t(RegType::kX86_GpbLo) ? 1   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 15  :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 19  :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 23  :
+                    X == uint32_t(RegType::kX86_Xmm  ) ? 27  :
+                    X == uint32_t(RegType::kX86_Ymm  ) ? 31  :
+                    X == uint32_t(RegType::kX86_Zmm  ) ? 35  :
+                    X == uint32_t(RegType::kX86_Mm   ) ? 50  :
+                    X == uint32_t(RegType::kX86_KReg ) ? 53  :
+                    X == uint32_t(RegType::kX86_SReg ) ? 43  :
+                    X == uint32_t(RegType::kX86_CReg ) ? 59  :
+                    X == uint32_t(RegType::kX86_DReg ) ? 62  :
+                    X == uint32_t(RegType::kX86_St   ) ? 47  :
+                    X == uint32_t(RegType::kX86_Bnd  ) ? 55  :
+                    X == uint32_t(RegType::kX86_Tmm  ) ? 65  :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 39  : 0,
+
+    kFormatIndex  = X == uint32_t(RegType::kX86_GpbLo) ? 1   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 6   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 11  :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 16  :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 21  :
+                    X == uint32_t(RegType::kX86_Xmm  ) ? 25  :
+                    X == uint32_t(RegType::kX86_Ymm  ) ? 31  :
+                    X == uint32_t(RegType::kX86_Zmm  ) ? 37  :
+                    X == uint32_t(RegType::kX86_Mm   ) ? 60  :
+                    X == uint32_t(RegType::kX86_KReg ) ? 65  :
+                    X == uint32_t(RegType::kX86_SReg ) ? 49  :
+                    X == uint32_t(RegType::kX86_CReg ) ? 75  :
+                    X == uint32_t(RegType::kX86_DReg ) ? 80  :
+                    X == uint32_t(RegType::kX86_St   ) ? 55  :
+                    X == uint32_t(RegType::kX86_Bnd  ) ? 69  :
+                    X == uint32_t(RegType::kX86_Tmm  ) ? 89  :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 43  : 0,
+
+    kSpecialIndex = X == uint32_t(RegType::kX86_GpbLo) ? 96  :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 128 :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 161 :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 160 :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 192 :
+                    X == uint32_t(RegType::kX86_SReg ) ? 224 :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 85  : 0,
+
+    kSpecialCount = X == uint32_t(RegType::kX86_GpbLo) ? 8   :
+                    X == uint32_t(RegType::kX86_GpbHi) ? 4   :
+                    X == uint32_t(RegType::kX86_Gpw  ) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpd  ) ? 8   :
+                    X == uint32_t(RegType::kX86_Gpq  ) ? 8   :
+                    X == uint32_t(RegType::kX86_SReg ) ? 7   :
+                    X == uint32_t(RegType::kX86_Rip  ) ? 1   : 0
+  };
+};
+
+#define ASMJIT_REG_TYPE_ENTRY(TYPE) {   \
+  RegFormatInfo_T<TYPE>::kTypeIndex     \
+}
+
+#define ASMJIT_REG_NAME_ENTRY(TYPE) {   \
+  RegTraits<RegType(TYPE)>::kCount,     \
+  RegFormatInfo_T<TYPE>::kFormatIndex,  \
+  RegFormatInfo_T<TYPE>::kSpecialIndex, \
+  RegFormatInfo_T<TYPE>::kSpecialCount  \
+}
+
+static const RegFormatInfo x86RegFormatInfo = {
+  // Register type entries and strings.
+  { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_TYPE_ENTRY, 0) },
+
+  "\0"             // #0
+  "gpb\0\0\0\0"    // #1
+  "gpb.hi\0"       // #8
+  "gpw\0"          // #15
+  "gpd\0"          // #19
+  "gpq\0"          // #23
+  "xmm\0"          // #27
+  "ymm\0"          // #31
+  "zmm\0"          // #35
+  "rip\0"          // #39
+  "seg\0"          // #43
+  "st\0"           // #47
+  "mm\0"           // #50
+  "k\0"            // #53
+  "bnd\0"          // #55
+  "cr\0"           // #59
+  "dr\0"           // #62
+  "tmm\0"          // #65
+  ,
+
+  // Register name entries and strings.
+  { ASMJIT_LOOKUP_TABLE_32(ASMJIT_REG_NAME_ENTRY, 0) },
+
+  "\0"
+  "r%ub\0"         // #1
+  "r%uh\0"         // #6
+  "r%uw\0"         // #11
+  "r%ud\0"         // #16
+  "r%u\0"          // #21
+  "xmm%u\0"        // #25
+  "ymm%u\0"        // #31
+  "zmm%u\0"        // #37
+  "rip%u\0"        // #43
+  "seg%u\0"        // #49
+  "st%u\0"         // #55
+  "mm%u\0"         // #60
+  "k%u\0"          // #65
+  "bnd%u\0"        // #69
+  "cr%u\0"         // #75
+  "dr%u\0"         // #80
+
+  "rip\0"          // #85
+  "tmm%u\0"        // #89
+  "\0"             // #95
+
+  "al\0\0" "cl\0\0" "dl\0\0" "bl\0\0" "spl\0"  "bpl\0"  "sil\0"  "dil\0" // #96
+  "ah\0\0" "ch\0\0" "dh\0\0" "bh\0\0" "n/a\0"  "n/a\0"  "n/a\0"  "n/a\0" // #128
+  "eax\0"  "ecx\0"  "edx\0"  "ebx\0"  "esp\0"  "ebp\0"  "esi\0"  "edi\0" // #160
+  "rax\0"  "rcx\0"  "rdx\0"  "rbx\0"  "rsp\0"  "rbp\0"  "rsi\0"  "rdi\0" // #192
+  "n/a\0"  "es\0\0" "cs\0\0" "ss\0\0" "ds\0\0" "fs\0\0" "gs\0\0" "n/a\0" // #224
+};
+#undef ASMJIT_REG_NAME_ENTRY
+#undef ASMJIT_REG_TYPE_ENTRY
+
+static const char* x86GetAddressSizeString(uint32_t size) noexcept {
+  switch (size) {
+    case 1 : return "byte ptr ";
+    case 2 : return "word ptr ";
+    case 4 : return "dword ptr ";
+    case 6 : return "fword ptr ";
+    case 8 : return "qword ptr ";
+    case 10: return "tbyte ptr ";
+    case 16: return "xmmword ptr ";
+    case 32: return "ymmword ptr ";
+    case 64: return "zmmword ptr ";
+    default: return "";
+  }
+}
+
+// x86::FormatterInternal - Format FeatureId
+// =========================================
+
+Error FormatterInternal::formatFeature(String& sb, uint32_t featureId) noexcept {
+  // @EnumStringBegin{"enum": "CpuFeatures::X86", "output": "sFeature", "strip": "k"}@
+  static const char sFeatureString[] =
+    "None\0"
+    "MT\0"
+    "NX\0"
+    "3DNOW\0"
+    "3DNOW2\0"
+    "ADX\0"
+    "AESNI\0"
+    "ALTMOVCR8\0"
+    "AMX_BF16\0"
+    "AMX_INT8\0"
+    "AMX_TILE\0"
+    "AVX\0"
+    "AVX2\0"
+    "AVX512_4FMAPS\0"
+    "AVX512_4VNNIW\0"
+    "AVX512_BF16\0"
+    "AVX512_BITALG\0"
+    "AVX512_BW\0"
+    "AVX512_CDI\0"
+    "AVX512_DQ\0"
+    "AVX512_ERI\0"
+    "AVX512_F\0"
+    "AVX512_FP16\0"
+    "AVX512_IFMA\0"
+    "AVX512_PFI\0"
+    "AVX512_VBMI\0"
+    "AVX512_VBMI2\0"
+    "AVX512_VL\0"
+    "AVX512_VNNI\0"
+    "AVX512_VP2INTERSECT\0"
+    "AVX512_VPOPCNTDQ\0"
+    "AVX_VNNI\0"
+    "BMI\0"
+    "BMI2\0"
+    "CET_IBT\0"
+    "CET_SS\0"
+    "CLDEMOTE\0"
+    "CLFLUSH\0"
+    "CLFLUSHOPT\0"
+    "CLWB\0"
+    "CLZERO\0"
+    "CMOV\0"
+    "CMPXCHG16B\0"
+    "CMPXCHG8B\0"
+    "ENCLV\0"
+    "ENQCMD\0"
+    "ERMS\0"
+    "F16C\0"
+    "FMA\0"
+    "FMA4\0"
+    "FPU\0"
+    "FSGSBASE\0"
+    "FXSR\0"
+    "FXSROPT\0"
+    "GEODE\0"
+    "GFNI\0"
+    "HLE\0"
+    "HRESET\0"
+    "I486\0"
+    "LAHFSAHF\0"
+    "LWP\0"
+    "LZCNT\0"
+    "MCOMMIT\0"
+    "MMX\0"
+    "MMX2\0"
+    "MONITOR\0"
+    "MONITORX\0"
+    "MOVBE\0"
+    "MOVDIR64B\0"
+    "MOVDIRI\0"
+    "MPX\0"
+    "MSR\0"
+    "MSSE\0"
+    "OSXSAVE\0"
+    "OSPKE\0"
+    "PCLMULQDQ\0"
+    "PCONFIG\0"
+    "POPCNT\0"
+    "PREFETCHW\0"
+    "PREFETCHWT1\0"
+    "PTWRITE\0"
+    "RDPID\0"
+    "RDPRU\0"
+    "RDRAND\0"
+    "RDSEED\0"
+    "RDTSC\0"
+    "RDTSCP\0"
+    "RTM\0"
+    "SERIALIZE\0"
+    "SHA\0"
+    "SKINIT\0"
+    "SMAP\0"
+    "SMEP\0"
+    "SMX\0"
+    "SNP\0"
+    "SSE\0"
+    "SSE2\0"
+    "SSE3\0"
+    "SSE4_1\0"
+    "SSE4_2\0"
+    "SSE4A\0"
+    "SSSE3\0"
+    "SVM\0"
+    "TBM\0"
+    "TSX\0"
+    "TSXLDTRK\0"
+    "UINTR\0"
+    "VAES\0"
+    "VMX\0"
+    "VPCLMULQDQ\0"
+    "WAITPKG\0"
+    "WBNOINVD\0"
+    "XOP\0"
+    "XSAVE\0"
+    "XSAVEC\0"
+    "XSAVEOPT\0"
+    "XSAVES\0"
+    "<Unknown>\0";
+
+  static const uint16_t sFeatureIndex[] = {
+    0, 5, 8, 11, 17, 24, 28, 34, 44, 53, 62, 71, 75, 80, 94, 108, 120, 134, 144,
+    155, 165, 176, 185, 197, 209, 220, 232, 245, 255, 267, 287, 304, 313, 317,
+    322, 330, 337, 346, 354, 365, 370, 377, 382, 393, 403, 409, 416, 421, 426,
+    430, 435, 439, 448, 453, 461, 467, 472, 476, 483, 488, 497, 501, 507, 515,
+    519, 524, 532, 541, 547, 557, 565, 569, 573, 578, 586, 592, 602, 610, 617,
+    627, 639, 647, 653, 659, 666, 673, 679, 686, 690, 700, 704, 711, 716, 721,
+    725, 729, 733, 738, 743, 750, 757, 763, 769, 773, 777, 781, 790, 796, 801,
+    805, 816, 824, 833, 837, 843, 850, 859, 866
+  };
+  // @EnumStringEnd@
+
+  return sb.append(sFeatureString + sFeatureIndex[Support::min<uint32_t>(featureId, uint32_t(CpuFeatures::X86::kMaxValue) + 1)]);
+}
+
+// x86::FormatterInternal - Format Register
+// ========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatRegister(String& sb, FormatFlags formatFlags, const BaseEmitter* emitter, Arch arch, RegType type, uint32_t id) noexcept {
+  DebugUtils::unused(arch);
+  const RegFormatInfo& info = x86RegFormatInfo;
+
+#ifndef ASMJIT_NO_COMPILER
+  if (Operand::isVirtId(id)) {
+    if (emitter && emitter->emitterType() == EmitterType::kCompiler) {
+      const BaseCompiler* cc = static_cast<const BaseCompiler*>(emitter);
+      if (cc->isVirtIdValid(id)) {
+        VirtReg* vReg = cc->virtRegById(id);
+        ASMJIT_ASSERT(vReg != nullptr);
+
+        const char* name = vReg->name();
+        if (name && name[0] != '\0')
+          ASMJIT_PROPAGATE(sb.append(name));
+        else
+          ASMJIT_PROPAGATE(sb.appendFormat("%%%u", unsigned(Operand::virtIdToIndex(id))));
+
+        bool formatType = (Support::test(formatFlags, FormatFlags::kRegType)) ||
+                          (Support::test(formatFlags, FormatFlags::kRegCasts) && vReg->type() != type);
+
+        if (formatType && uint32_t(type) <= uint32_t(RegType::kMaxValue)) {
+          const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
+          if (typeEntry.index)
+            ASMJIT_PROPAGATE(sb.appendFormat("@%s", info.typeStrings + typeEntry.index));
+        }
+
+        return kErrorOk;
+      }
+    }
+  }
+#else
+  DebugUtils::unused(emitter, formatFlags);
+#endif
+
+  if (uint32_t(type) <= uint32_t(RegType::kMaxValue)) {
+    const RegFormatInfo::NameEntry& nameEntry = info.nameEntries[size_t(type)];
+
+    if (id < nameEntry.specialCount)
+      return sb.append(info.nameStrings + nameEntry.specialIndex + id * 4);
+
+    if (id < nameEntry.count)
+      return sb.appendFormat(info.nameStrings + nameEntry.formatIndex, unsigned(id));
+
+    const RegFormatInfo::TypeEntry& typeEntry = info.typeEntries[size_t(type)];
+    if (typeEntry.index)
+      return sb.appendFormat("%s@%u", info.typeStrings + typeEntry.index, id);
+  }
+
+  return sb.appendFormat("<Reg-%u>?%u", uint32_t(type), id);
+}
+
+// x86::FormatterInternal - Format Operand
+// =======================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatOperand(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept {
+
+  if (op.isReg())
+    return formatRegister(sb, formatFlags, emitter, arch, op.as<BaseReg>().type(), op.as<BaseReg>().id());
+
+  if (op.isMem()) {
+    const Mem& m = op.as<Mem>();
+    ASMJIT_PROPAGATE(sb.append(x86GetAddressSizeString(m.size())));
+
+    // Segment override prefix.
+    uint32_t seg = m.segmentId();
+    if (seg != SReg::kIdNone && seg < SReg::kIdCount)
+      ASMJIT_PROPAGATE(sb.appendFormat("%s:", x86RegFormatInfo.nameStrings + 224 + size_t(seg) * 4));
+
+    ASMJIT_PROPAGATE(sb.append('['));
+    switch (m.addrType()) {
+      case Mem::AddrType::kDefault:
+        break;
+      case Mem::AddrType::kAbs:
+        ASMJIT_PROPAGATE(sb.append("abs "));
+        break;
+      case Mem::AddrType::kRel:
+        ASMJIT_PROPAGATE(sb.append("rel "));
+        break;
+    }
+
+    char opSign = '\0';
+    if (m.hasBase()) {
+      opSign = '+';
+      if (m.hasBaseLabel()) {
+        ASMJIT_PROPAGATE(Formatter::formatLabel(sb, formatFlags, emitter, m.baseId()));
+      }
+      else {
+        FormatFlags modifiedFlags = formatFlags;
+        if (m.isRegHome()) {
+          ASMJIT_PROPAGATE(sb.append("&"));
+          modifiedFlags &= ~FormatFlags::kRegCasts;
+        }
+        ASMJIT_PROPAGATE(formatRegister(sb, modifiedFlags, emitter, arch, m.baseType(), m.baseId()));
+      }
+    }
+
+    if (m.hasIndex()) {
+      if (opSign)
+        ASMJIT_PROPAGATE(sb.append(opSign));
+
+      opSign = '+';
+      ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, m.indexType(), m.indexId()));
+      if (m.hasShift())
+        ASMJIT_PROPAGATE(sb.appendFormat("*%u", 1 << m.shift()));
+    }
+
+    uint64_t off = uint64_t(m.offset());
+    if (off || !m.hasBaseOrIndex()) {
+      if (int64_t(off) < 0) {
+        opSign = '-';
+        off = ~off + 1;
+      }
+
+      if (opSign)
+        ASMJIT_PROPAGATE(sb.append(opSign));
+
+      uint32_t base = 10;
+      if (Support::test(formatFlags, FormatFlags::kHexOffsets) && off > 9) {
+        ASMJIT_PROPAGATE(sb.append("0x", 2));
+        base = 16;
+      }
+
+      ASMJIT_PROPAGATE(sb.appendUInt(off, base));
+    }
+
+    return sb.append(']');
+  }
+
+  if (op.isImm()) {
+    const Imm& i = op.as<Imm>();
+    int64_t val = i.value();
+
+    if (Support::test(formatFlags, FormatFlags::kHexImms) && uint64_t(val) > 9) {
+      ASMJIT_PROPAGATE(sb.append("0x", 2));
+      return sb.appendUInt(uint64_t(val), 16);
+    }
+    else {
+      return sb.appendInt(val, 10);
+    }
+  }
+
+  if (op.isLabel()) {
+    return Formatter::formatLabel(sb, formatFlags, emitter, op.id());
+  }
+
+  return sb.append("<None>");
+}
+
+// x86::FormatterInternal - Format Immediate (Extension)
+// =====================================================
+
+static constexpr char kImmCharStart = '{';
+static constexpr char kImmCharEnd   = '}';
+static constexpr char kImmCharOr    = '|';
+
+struct ImmBits {
+  enum Mode : uint32_t {
+    kModeLookup = 0,
+    kModeFormat = 1
+  };
+
+  uint8_t mask;
+  uint8_t shift;
+  uint8_t mode;
+  char text[48 - 3];
+};
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmShuf(String& sb, uint32_t imm8, uint32_t bits, uint32_t count) noexcept {
+  uint32_t mask = (1 << bits) - 1;
+  uint32_t lastPredicateShift = bits * (count - 1u);
+
+  for (uint32_t i = 0; i < count; i++, imm8 <<= bits) {
+    uint32_t index = (imm8 >> lastPredicateShift) & mask;
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.appendUInt(index));
+  }
+
+  if (kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmBits(String& sb, uint32_t imm8, const ImmBits* bits, uint32_t count) noexcept {
+  uint32_t n = 0;
+  char buf[64];
+
+  for (uint32_t i = 0; i < count; i++) {
+    const ImmBits& spec = bits[i];
+
+    uint32_t value = (imm8 & uint32_t(spec.mask)) >> spec.shift;
+    const char* str = nullptr;
+
+    switch (spec.mode) {
+      case ImmBits::kModeLookup:
+        str = Support::findPackedString(spec.text, value);
+        break;
+
+      case ImmBits::kModeFormat:
+        snprintf(buf, sizeof(buf), spec.text, unsigned(value));
+        str = buf;
+        break;
+
+      default:
+        return DebugUtils::errored(kErrorInvalidState);
+    }
+
+    if (!str[0])
+      continue;
+
+    ASMJIT_PROPAGATE(sb.append(++n == 1 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.append(str));
+  }
+
+  if (n && kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_formatImmText(String& sb, uint32_t imm8, uint32_t bits, uint32_t advance, const char* text, uint32_t count = 1) noexcept {
+  uint32_t mask = (1u << bits) - 1;
+  uint32_t pos = 0;
+
+  for (uint32_t i = 0; i < count; i++, imm8 >>= bits, pos += advance) {
+    uint32_t value = (imm8 & mask) + pos;
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? kImmCharStart : kImmCharOr));
+    ASMJIT_PROPAGATE(sb.append(Support::findPackedString(text, value)));
+  }
+
+  if (kImmCharEnd)
+    ASMJIT_PROPAGATE(sb.append(kImmCharEnd));
+
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE static Error FormatterInternal_explainConst(
+  String& sb,
+  FormatFlags formatFlags,
+  InstId instId,
+  uint32_t vecSize,
+  const Imm& imm) noexcept {
+
+  DebugUtils::unused(formatFlags);
+
+  static const char vcmpx[] =
+    "EQ_OQ\0" "LT_OS\0"  "LE_OS\0"  "UNORD_Q\0"  "NEQ_UQ\0" "NLT_US\0" "NLE_US\0" "ORD_Q\0"
+    "EQ_UQ\0" "NGE_US\0" "NGT_US\0" "FALSE_OQ\0" "NEQ_OQ\0" "GE_OS\0"  "GT_OS\0"  "TRUE_UQ\0"
+    "EQ_OS\0" "LT_OQ\0"  "LE_OQ\0"  "UNORD_S\0"  "NEQ_US\0" "NLT_UQ\0" "NLE_UQ\0" "ORD_S\0"
+    "EQ_US\0" "NGE_UQ\0" "NGT_UQ\0" "FALSE_OS\0" "NEQ_OS\0" "GE_OQ\0"  "GT_OQ\0"  "TRUE_US\0";
+
+  // Why to make it compatible...
+  static const char vpcmpx[] = "EQ\0" "LT\0" "LE\0" "FALSE\0" "NEQ\0" "GE\0"  "GT\0"    "TRUE\0";
+  static const char vpcomx[] = "LT\0" "LE\0" "GT\0" "GE\0"    "EQ\0"  "NEQ\0" "FALSE\0" "TRUE\0";
+
+  static const char vshufpd[] = "A0\0A1\0B0\0B1\0A2\0A3\0B2\0B3\0A4\0A5\0B4\0B5\0A6\0A7\0B6\0B7\0";
+  static const char vshufps[] = "A0\0A1\0A2\0A3\0A0\0A1\0A2\0A3\0B0\0B1\0B2\0B3\0B0\0B1\0B2\0B3\0";
+
+  static const ImmBits vfpclassxx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "QNAN\0" "+0\0" "-0\0" "+INF\0" "-INF\0" "DENORMAL\0" "-FINITE\0" "SNAN\0" }
+  };
+
+  static const ImmBits vfixupimmxx[] = {
+    { 0x01u, 0, ImmBits::kModeLookup, "\0" "+INF_IE\0" },
+    { 0x02u, 1, ImmBits::kModeLookup, "\0" "-VE_IE\0"  },
+    { 0x04u, 2, ImmBits::kModeLookup, "\0" "-INF_IE\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "SNAN_IE\0" },
+    { 0x10u, 4, ImmBits::kModeLookup, "\0" "ONE_IE\0"  },
+    { 0x20u, 5, ImmBits::kModeLookup, "\0" "ONE_ZE\0"  },
+    { 0x40u, 6, ImmBits::kModeLookup, "\0" "ZERO_IE\0" },
+    { 0x80u, 7, ImmBits::kModeLookup, "\0" "ZERO_ZE\0" }
+  };
+
+  static const ImmBits vgetmantxx[] = {
+    { 0x03u, 0, ImmBits::kModeLookup, "[1, 2)\0" "[.5, 2)\0" "[.5, 1)\0" "[.75, 1.5)\0" },
+    { 0x04u, 2, ImmBits::kModeLookup, "\0" "NO_SIGN\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "QNAN_IF_SIGN\0" }
+  };
+
+  static const ImmBits vmpsadbw[] = {
+    { 0x40u, 6, ImmBits::kModeLookup, "BLK1[4]\0" "BLK1[5]\0" },
+    { 0x30u, 4, ImmBits::kModeLookup, "BLK2[4]\0" "BLK2[5]\0" "BLK2[6]\0" "BLK2[7]\0" },
+    { 0x04u, 2, ImmBits::kModeLookup, "BLK1[0]\0" "BLK1[1]\0" },
+    { 0x03u, 0, ImmBits::kModeLookup, "BLK2[0]\0" "BLK2[1]\0" "BLK2[2]\0" "BLK2[3]\0" }
+  };
+
+  static const ImmBits vpclmulqdq[] = {
+    { 0x10u, 4, ImmBits::kModeLookup, "LQ\0" "HQ\0" },
+    { 0x01u, 0, ImmBits::kModeLookup, "LQ\0" "HQ\0" }
+  };
+
+  static const ImmBits vperm2x128[] = {
+    { 0xB0u, 4, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" },
+    { 0x0Bu, 0, ImmBits::kModeLookup, "A0\0" "A1\0" "B0\0" "B1\0" "\0" "\0" "\0" "\0" "0\0" "0\0" "0\0" "0\0" }
+  };
+
+  static const ImmBits vrangexx[] = {
+    { 0x0Cu, 2, ImmBits::kModeLookup, "SIGN_A\0" "SIGN_B\0" "SIGN_0\0" "SIGN_1\0" },
+    { 0x03u, 0, ImmBits::kModeLookup, "MIN\0" "MAX\0" "MIN_ABS\0" "MAX_ABS\0" }
+  };
+
+  static const ImmBits vreducexx_vrndscalexx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "\0" "\0" "\0" "\0" "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "SAE\0" },
+    { 0xF0u, 4, ImmBits::kModeFormat, "LEN=%d" }
+  };
+
+  static const ImmBits vroundxx[] = {
+    { 0x07u, 0, ImmBits::kModeLookup, "ROUND\0" "FLOOR\0" "CEIL\0" "TRUNC\0" "\0" "\0" "\0" "\0" },
+    { 0x08u, 3, ImmBits::kModeLookup, "\0" "INEXACT\0" }
+  };
+
+  uint32_t u8 = imm.valueAs<uint8_t>();
+  switch (instId) {
+    case Inst::kIdVblendpd:
+    case Inst::kIdBlendpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
+
+    case Inst::kIdVblendps:
+    case Inst::kIdBlendps:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 4);
+
+    case Inst::kIdVcmppd:
+    case Inst::kIdVcmpps:
+    case Inst::kIdVcmpsd:
+    case Inst::kIdVcmpss:
+      return FormatterInternal_formatImmText(sb, u8, 5, 0, vcmpx);
+
+    case Inst::kIdCmppd:
+    case Inst::kIdCmpps:
+    case Inst::kIdCmpsd:
+    case Inst::kIdCmpss:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vcmpx);
+
+    case Inst::kIdVdbpsadbw:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVdppd:
+    case Inst::kIdVdpps:
+    case Inst::kIdDppd:
+    case Inst::kIdDpps:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVmpsadbw:
+    case Inst::kIdMpsadbw:
+      return FormatterInternal_formatImmBits(sb, u8, vmpsadbw, Support::min<uint32_t>(vecSize / 8, 4));
+
+    case Inst::kIdVpblendw:
+    case Inst::kIdPblendw:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVpblendd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, Support::min<uint32_t>(vecSize / 4, 8));
+
+    case Inst::kIdVpclmulqdq:
+    case Inst::kIdPclmulqdq:
+      return FormatterInternal_formatImmBits(sb, u8, vpclmulqdq, ASMJIT_ARRAY_SIZE(vpclmulqdq));
+
+    case Inst::kIdVroundpd:
+    case Inst::kIdVroundps:
+    case Inst::kIdVroundsd:
+    case Inst::kIdVroundss:
+    case Inst::kIdRoundpd:
+    case Inst::kIdRoundps:
+    case Inst::kIdRoundsd:
+    case Inst::kIdRoundss:
+      return FormatterInternal_formatImmBits(sb, u8, vroundxx, ASMJIT_ARRAY_SIZE(vroundxx));
+
+    case Inst::kIdVshufpd:
+    case Inst::kIdShufpd:
+      return FormatterInternal_formatImmText(sb, u8, 1, 2, vshufpd, Support::min<uint32_t>(vecSize / 8, 8));
+
+    case Inst::kIdVshufps:
+    case Inst::kIdShufps:
+      return FormatterInternal_formatImmText(sb, u8, 2, 4, vshufps, 4);
+
+    case Inst::kIdVcvtps2ph:
+      return FormatterInternal_formatImmBits(sb, u8, vroundxx, 1);
+
+    case Inst::kIdVperm2f128:
+    case Inst::kIdVperm2i128:
+      return FormatterInternal_formatImmBits(sb, u8, vperm2x128, ASMJIT_ARRAY_SIZE(vperm2x128));
+
+    case Inst::kIdVpermilpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, vecSize / 8);
+
+    case Inst::kIdVpermilps:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpshufd:
+    case Inst::kIdPshufd:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpshufhw:
+    case Inst::kIdVpshuflw:
+    case Inst::kIdPshufhw:
+    case Inst::kIdPshuflw:
+    case Inst::kIdPshufw:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVfixupimmpd:
+    case Inst::kIdVfixupimmps:
+    case Inst::kIdVfixupimmsd:
+    case Inst::kIdVfixupimmss:
+      return FormatterInternal_formatImmBits(sb, u8, vfixupimmxx, ASMJIT_ARRAY_SIZE(vfixupimmxx));
+
+    case Inst::kIdVfpclasspd:
+    case Inst::kIdVfpclassps:
+    case Inst::kIdVfpclasssd:
+    case Inst::kIdVfpclassss:
+      return FormatterInternal_formatImmBits(sb, u8, vfpclassxx, ASMJIT_ARRAY_SIZE(vfpclassxx));
+
+    case Inst::kIdVgetmantpd:
+    case Inst::kIdVgetmantps:
+    case Inst::kIdVgetmantsd:
+    case Inst::kIdVgetmantss:
+      return FormatterInternal_formatImmBits(sb, u8, vgetmantxx, ASMJIT_ARRAY_SIZE(vgetmantxx));
+
+    case Inst::kIdVpcmpb:
+    case Inst::kIdVpcmpd:
+    case Inst::kIdVpcmpq:
+    case Inst::kIdVpcmpw:
+    case Inst::kIdVpcmpub:
+    case Inst::kIdVpcmpud:
+    case Inst::kIdVpcmpuq:
+    case Inst::kIdVpcmpuw:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcmpx);
+
+    case Inst::kIdVpcomb:
+    case Inst::kIdVpcomd:
+    case Inst::kIdVpcomq:
+    case Inst::kIdVpcomw:
+    case Inst::kIdVpcomub:
+    case Inst::kIdVpcomud:
+    case Inst::kIdVpcomuq:
+    case Inst::kIdVpcomuw:
+      return FormatterInternal_formatImmText(sb, u8, 3, 0, vpcomx);
+
+    case Inst::kIdVpermq:
+    case Inst::kIdVpermpd:
+      return FormatterInternal_formatImmShuf(sb, u8, 2, 4);
+
+    case Inst::kIdVpternlogd:
+    case Inst::kIdVpternlogq:
+      return FormatterInternal_formatImmShuf(sb, u8, 1, 8);
+
+    case Inst::kIdVrangepd:
+    case Inst::kIdVrangeps:
+    case Inst::kIdVrangesd:
+    case Inst::kIdVrangess:
+      return FormatterInternal_formatImmBits(sb, u8, vrangexx, ASMJIT_ARRAY_SIZE(vrangexx));
+
+    case Inst::kIdVreducepd:
+    case Inst::kIdVreduceps:
+    case Inst::kIdVreducesd:
+    case Inst::kIdVreducess:
+    case Inst::kIdVrndscalepd:
+    case Inst::kIdVrndscaleps:
+    case Inst::kIdVrndscalesd:
+    case Inst::kIdVrndscaless:
+      return FormatterInternal_formatImmBits(sb, u8, vreducexx_vrndscalexx, ASMJIT_ARRAY_SIZE(vreducexx_vrndscalexx));
+
+    case Inst::kIdVshuff32x4:
+    case Inst::kIdVshuff64x2:
+    case Inst::kIdVshufi32x4:
+    case Inst::kIdVshufi64x2: {
+      uint32_t count = Support::max<uint32_t>(vecSize / 16, 2u);
+      uint32_t bits = count <= 2 ? 1u : 2u;
+      return FormatterInternal_formatImmShuf(sb, u8, bits, count);
+    }
+
+    default:
+      return kErrorOk;
+  }
+}
+
+// x86::FormatterInternal - Format Instruction
+// ===========================================
+
+ASMJIT_FAVOR_SIZE Error FormatterInternal::formatInstruction(
+  String& sb,
+  FormatFlags formatFlags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept {
+
+  InstId instId = inst.id();
+  InstOptions options = inst.options();
+
+  // Format instruction options and instruction mnemonic.
+  if (instId < Inst::_kIdCount) {
+    // VEX|EVEX options.
+    if (Support::test(options, InstOptions::kX86_Vex))
+      ASMJIT_PROPAGATE(sb.append("{vex} "));
+
+    if (Support::test(options, InstOptions::kX86_Vex3))
+      ASMJIT_PROPAGATE(sb.append("{vex3} "));
+
+    if (Support::test(options, InstOptions::kX86_Evex))
+      ASMJIT_PROPAGATE(sb.append("{evex} "));
+
+    // MOD/RM and MOD/MR options
+    if (Support::test(options, InstOptions::kX86_ModRM))
+      ASMJIT_PROPAGATE(sb.append("{modrm} "));
+    else if (Support::test(options, InstOptions::kX86_ModMR))
+      ASMJIT_PROPAGATE(sb.append("{modmr} "));
+
+    // SHORT|LONG options.
+    if (Support::test(options, InstOptions::kShortForm))
+      ASMJIT_PROPAGATE(sb.append("short "));
+
+    if (Support::test(options, InstOptions::kLongForm))
+      ASMJIT_PROPAGATE(sb.append("long "));
+
+    // LOCK|XACQUIRE|XRELEASE options.
+    if (Support::test(options, InstOptions::kX86_XAcquire))
+      ASMJIT_PROPAGATE(sb.append("xacquire "));
+
+    if (Support::test(options, InstOptions::kX86_XRelease))
+      ASMJIT_PROPAGATE(sb.append("xrelease "));
+
+    if (Support::test(options, InstOptions::kX86_Lock))
+      ASMJIT_PROPAGATE(sb.append("lock "));
+
+    // REP|REPNE options.
+    if (Support::test(options, InstOptions::kX86_Rep | InstOptions::kX86_Repne)) {
+      sb.append(Support::test(options, InstOptions::kX86_Rep) ? "rep " : "repnz ");
+      if (inst.hasExtraReg()) {
+        ASMJIT_PROPAGATE(sb.append("{"));
+        ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, inst.extraReg().toReg<BaseReg>()));
+        ASMJIT_PROPAGATE(sb.append("} "));
+      }
+    }
+
+    // REX options.
+    if (Support::test(options, InstOptions::kX86_Rex)) {
+      const InstOptions kRXBWMask = InstOptions::kX86_OpCodeR |
+                                    InstOptions::kX86_OpCodeX |
+                                    InstOptions::kX86_OpCodeB |
+                                    InstOptions::kX86_OpCodeW ;
+      if (Support::test(options, kRXBWMask)) {
+        ASMJIT_PROPAGATE(sb.append("rex."));
+        if (Support::test(options, InstOptions::kX86_OpCodeR)) sb.append('r');
+        if (Support::test(options, InstOptions::kX86_OpCodeX)) sb.append('x');
+        if (Support::test(options, InstOptions::kX86_OpCodeB)) sb.append('b');
+        if (Support::test(options, InstOptions::kX86_OpCodeW)) sb.append('w');
+        sb.append(' ');
+      }
+      else {
+        ASMJIT_PROPAGATE(sb.append("rex "));
+      }
+    }
+
+    ASMJIT_PROPAGATE(InstInternal::instIdToString(arch, instId, sb));
+  }
+  else {
+    ASMJIT_PROPAGATE(sb.appendFormat("[InstId=#%u]", unsigned(instId)));
+  }
+
+  for (uint32_t i = 0; i < opCount; i++) {
+    const Operand_& op = operands[i];
+    if (op.isNone()) break;
+
+    ASMJIT_PROPAGATE(sb.append(i == 0 ? " " : ", "));
+    ASMJIT_PROPAGATE(formatOperand(sb, formatFlags, emitter, arch, op));
+
+    if (op.isImm() && uint32_t(formatFlags & FormatFlags::kExplainImms)) {
+      uint32_t vecSize = 16;
+      for (uint32_t j = 0; j < opCount; j++)
+        if (operands[j].isReg())
+          vecSize = Support::max<uint32_t>(vecSize, operands[j].size());
+      ASMJIT_PROPAGATE(FormatterInternal_explainConst(sb, formatFlags, instId, vecSize, op.as<Imm>()));
+    }
+
+    // Support AVX-512 masking - {k}{z}.
+    if (i == 0) {
+      if (inst.extraReg().group() == RegGroup::kX86_K) {
+        ASMJIT_PROPAGATE(sb.append(" {"));
+        ASMJIT_PROPAGATE(formatRegister(sb, formatFlags, emitter, arch, inst.extraReg().type(), inst.extraReg().id()));
+        ASMJIT_PROPAGATE(sb.append('}'));
+
+        if (Support::test(options, InstOptions::kX86_ZMask))
+          ASMJIT_PROPAGATE(sb.append("{z}"));
+      }
+      else if (Support::test(options, InstOptions::kX86_ZMask)) {
+        ASMJIT_PROPAGATE(sb.append(" {z}"));
+      }
+    }
+
+    // Support AVX-512 broadcast - {1tox}.
+    if (op.isMem() && op.as<Mem>().hasBroadcast()) {
+      ASMJIT_PROPAGATE(sb.appendFormat(" {1to%u}", Support::bitMask(uint32_t(op.as<Mem>().getBroadcast()))));
+    }
+  }
+
+  // Support AVX-512 embedded rounding and suppress-all-exceptions {sae}.
+  if (inst.hasOption(InstOptions::kX86_ER | InstOptions::kX86_SAE)) {
+    if (inst.hasOption(InstOptions::kX86_ER)) {
+      uint32_t bits = uint32_t(inst.options() & InstOptions::kX86_ERMask) >> Support::ConstCTZ<uint32_t(InstOptions::kX86_ERMask)>::value;
+
+      const char roundingModes[] = "rn\0rd\0ru\0rz";
+      ASMJIT_PROPAGATE(sb.appendFormat(", {%s-sae}", roundingModes + bits * 3));
+    }
+    else {
+      ASMJIT_PROPAGATE(sb.append(", {sae}"));
+    }
+  }
+
+  return kErrorOk;
+}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
--- a/src/asmjit/x86/x86formatter_p.h
+++ b/src/asmjit/x86/x86formatter_p.h
@ -0,0 +1,58 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
+#define ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_LOGGING
+
+#include "../core/formatter.h"
+#include "../core/string.h"
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace FormatterInternal {
+
+Error ASMJIT_CDECL formatFeature(
+  String& sb,
+  uint32_t featureId) noexcept;
+
+Error ASMJIT_CDECL formatRegister(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  RegType regType,
+  uint32_t regId) noexcept;
+
+Error ASMJIT_CDECL formatOperand(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const Operand_& op) noexcept;
+
+Error ASMJIT_CDECL formatInstruction(
+  String& sb,
+  FormatFlags flags,
+  const BaseEmitter* emitter,
+  Arch arch,
+  const BaseInst& inst, const Operand_* operands, size_t opCount) noexcept;
+
+} // {FormatterInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_LOGGING
+#endif // ASMJIT_X86_X86FORMATTER_P_H_INCLUDED
--- a/src/asmjit/x86/x86func.cpp
+++ b/src/asmjit/x86/x86func.cpp
@ -0,0 +1,503 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../x86/x86func_p.h"
+#include "../x86/x86emithelper_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+namespace FuncInternal {
+
+static inline bool shouldThreatAsCDeclIn64BitMode(CallConvId ccId) noexcept {
+  return ccId == CallConvId::kCDecl ||
+         ccId == CallConvId::kStdCall ||
+         ccId == CallConvId::kThisCall ||
+         ccId == CallConvId::kFastCall ||
+         ccId == CallConvId::kRegParm1 ||
+         ccId == CallConvId::kRegParm2 ||
+         ccId == CallConvId::kRegParm3;
+}
+
+ASMJIT_FAVOR_SIZE Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept {
+  constexpr uint32_t kZax = Gp::kIdAx;
+  constexpr uint32_t kZbx = Gp::kIdBx;
+  constexpr uint32_t kZcx = Gp::kIdCx;
+  constexpr uint32_t kZdx = Gp::kIdDx;
+  constexpr uint32_t kZsp = Gp::kIdSp;
+  constexpr uint32_t kZbp = Gp::kIdBp;
+  constexpr uint32_t kZsi = Gp::kIdSi;
+  constexpr uint32_t kZdi = Gp::kIdDi;
+
+  bool winABI = environment.isPlatformWindows() || environment.isMSVC();
+
+  cc.setArch(environment.arch());
+  cc.setSaveRestoreRegSize(RegGroup::kVec, 16);
+  cc.setSaveRestoreRegSize(RegGroup::kX86_MM, 8);
+  cc.setSaveRestoreRegSize(RegGroup::kX86_K, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kVec, 16);
+  cc.setSaveRestoreAlignment(RegGroup::kX86_MM, 8);
+  cc.setSaveRestoreAlignment(RegGroup::kX86_K, 8);
+
+  if (environment.is32Bit()) {
+    bool isStandardCallConv = true;
+
+    cc.setSaveRestoreRegSize(RegGroup::kGp, 4);
+    cc.setSaveRestoreAlignment(RegGroup::kGp, 4);
+
+    cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(Gp::kIdBx, Gp::kIdSp, Gp::kIdBp, Gp::kIdSi, Gp::kIdDi));
+    cc.setNaturalStackAlignment(4);
+
+    switch (ccId) {
+      case CallConvId::kCDecl:
+        break;
+
+      case CallConvId::kStdCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        break;
+
+      case CallConvId::kFastCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
+        break;
+
+      case CallConvId::kVectorCall:
+        cc.setFlags(CallConvFlags::kCalleePopsStack);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
+        break;
+
+      case CallConvId::kThisCall:
+        // NOTE: Even MINGW (starting with GCC 4.7.0) now uses __thiscall on MS Windows, so we won't bail to any
+        // other calling convention if __thiscall was specified.
+        if (winABI) {
+          cc.setFlags(CallConvFlags::kCalleePopsStack);
+          cc.setPassedOrder(RegGroup::kGp, kZcx);
+        }
+        else {
+          ccId = CallConvId::kCDecl;
+        }
+        break;
+
+      case CallConvId::kRegParm1:
+        cc.setPassedOrder(RegGroup::kGp, kZax);
+        break;
+
+      case CallConvId::kRegParm2:
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx);
+        break;
+
+      case CallConvId::kRegParm3:
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx);
+        break;
+
+      case CallConvId::kLightCall2:
+      case CallConvId::kLightCall3:
+      case CallConvId::kLightCall4: {
+        uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
+
+        cc.setFlags(CallConvFlags::kPassFloatsByVec);
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(8));
+        cc.setPreservedRegs(RegGroup::kVec, Support::lsbMask<uint32_t>(8) & ~Support::lsbMask<uint32_t>(n));
+
+        cc.setNaturalStackAlignment(16);
+        isStandardCallConv = false;
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArgument);
+    }
+
+    if (isStandardCallConv) {
+      // MMX arguments is something where compiler vendors disagree. For example GCC and MSVC would pass first three
+      // via registers and the rest via stack, however Clang passes all via stack. Returning MMX registers is even
+      // more fun, where GCC uses MM0, but Clang uses EAX:EDX pair. I'm not sure it's something we should be worried
+      // about as MMX is deprecated anyway.
+      cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2);
+
+      // Vector arguments (XMM|YMM|ZMM) are passed via registers. However, if the function is variadic then they have
+      // to be passed via stack.
+      cc.setPassedOrder(RegGroup::kVec, 0, 1, 2);
+
+      // Functions with variable arguments always use stack for MM and vector arguments.
+      cc.addFlags(CallConvFlags::kPassVecByStackIfVA);
+    }
+
+    if (ccId == CallConvId::kCDecl) {
+      cc.addFlags(CallConvFlags::kVarArgCompatible);
+    }
+  }
+  else {
+    cc.setSaveRestoreRegSize(RegGroup::kGp, 8);
+    cc.setSaveRestoreAlignment(RegGroup::kGp, 8);
+
+    // Preprocess the calling convention into a common id as many conventions are normally ignored even by C/C++
+    // compilers and treated as `__cdecl`.
+    if (shouldThreatAsCDeclIn64BitMode(ccId))
+      ccId = winABI ? CallConvId::kX64Windows : CallConvId::kX64SystemV;
+
+    switch (ccId) {
+      case CallConvId::kX64SystemV: {
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kPassMmxByXmm    |
+                    CallConvFlags::kVarArgCompatible);
+        cc.setNaturalStackAlignment(16);
+        cc.setRedZoneSize(128);
+        cc.setPassedOrder(RegGroup::kGp, kZdi, kZsi, kZdx, kZcx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kX64Windows: {
+        cc.setStrategy(CallConvStrategy::kX64Windows);
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kIndirectVecArgs |
+                    CallConvFlags::kPassMmxByGp     |
+                    CallConvFlags::kVarArgCompatible);
+        cc.setNaturalStackAlignment(16);
+        // Maximum 4 arguments in registers, each adds 8 bytes to the spill zone.
+        cc.setSpillZoneSize(4 * 8);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
+        cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kVectorCall: {
+        cc.setStrategy(CallConvStrategy::kX64VectorCall);
+        cc.setFlags(CallConvFlags::kPassFloatsByVec |
+                    CallConvFlags::kPassMmxByGp     );
+        cc.setNaturalStackAlignment(16);
+        // Maximum 6 arguments in registers, each adds 8 bytes to the spill zone.
+        cc.setSpillZoneSize(6 * 8);
+        cc.setPassedOrder(RegGroup::kGp, kZcx, kZdx, 8, 9);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5);
+        cc.setPreservedRegs(RegGroup::kGp, Support::bitMask(kZbx, kZsp, kZbp, kZsi, kZdi, 12, 13, 14, 15));
+        cc.setPreservedRegs(RegGroup::kVec, Support::bitMask(6, 7, 8, 9, 10, 11, 12, 13, 14, 15));
+        break;
+      }
+
+      case CallConvId::kLightCall2:
+      case CallConvId::kLightCall3:
+      case CallConvId::kLightCall4: {
+        uint32_t n = uint32_t(ccId) - uint32_t(CallConvId::kLightCall2) + 2;
+
+        cc.setFlags(CallConvFlags::kPassFloatsByVec);
+        cc.setNaturalStackAlignment(16);
+        cc.setPassedOrder(RegGroup::kGp, kZax, kZdx, kZcx, kZsi, kZdi);
+        cc.setPassedOrder(RegGroup::kVec, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_K, 0, 1, 2, 3, 4, 5, 6, 7);
+        cc.setPassedOrder(RegGroup::kX86_MM, 0, 1, 2, 3, 4, 5, 6, 7);
+
+        cc.setPreservedRegs(RegGroup::kGp, Support::lsbMask<uint32_t>(16));
+        cc.setPreservedRegs(RegGroup::kVec, ~Support::lsbMask<uint32_t>(n));
+        break;
+      }
+
+      default:
+        return DebugUtils::errored(kErrorInvalidArgument);
+    }
+  }
+
+  cc.setId(ccId);
+  return kErrorOk;
+}
+
+ASMJIT_FAVOR_SIZE void unpackValues(FuncDetail& func, FuncValuePack& pack) noexcept {
+  TypeId typeId = pack[0].typeId();
+  switch (typeId) {
+    case TypeId::kInt64:
+    case TypeId::kUInt64: {
+      if (Environment::is32Bit(func.callConv().arch())) {
+        // Convert a 64-bit return value to two 32-bit return values.
+        pack[0].initTypeId(TypeId::kUInt32);
+        pack[1].initTypeId(TypeId(uint32_t(typeId) - 2));
+        break;
+      }
+      break;
+    }
+
+    default: {
+      break;
+    }
+  }
+}
+
+ASMJIT_FAVOR_SIZE Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept {
+  const CallConv& cc = func.callConv();
+  Arch arch = cc.arch();
+  uint32_t stackOffset = cc._spillZoneSize;
+  uint32_t argCount = func.argCount();
+
+  // Up to two return values can be returned in GP registers.
+  static const uint8_t gpReturnIndexes[4] = {
+    uint8_t(Gp::kIdAx),
+    uint8_t(Gp::kIdDx),
+    uint8_t(BaseReg::kIdBad),
+    uint8_t(BaseReg::kIdBad)
+  };
+
+  if (func.hasRet()) {
+    unpackValues(func, func._rets);
+    for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+      TypeId typeId = func._rets[valueIndex].typeId();
+
+      // Terminate at the first void type (end of the pack).
+      if (typeId == TypeId::kVoid)
+        break;
+
+      switch (typeId) {
+        case TypeId::kInt64:
+        case TypeId::kUInt64: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpq, gpReturnIndexes[valueIndex], typeId);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kInt8:
+        case TypeId::kInt16:
+        case TypeId::kInt32: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kInt32);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kUInt8:
+        case TypeId::kUInt16:
+        case TypeId::kUInt32: {
+          if (gpReturnIndexes[valueIndex] != BaseReg::kIdBad)
+            func._rets[valueIndex].initReg(RegType::kX86_Gpd, gpReturnIndexes[valueIndex], TypeId::kUInt32);
+          else
+            return DebugUtils::errored(kErrorInvalidState);
+          break;
+        }
+
+        case TypeId::kFloat32:
+        case TypeId::kFloat64: {
+          RegType regType = Environment::is32Bit(arch) ? RegType::kX86_St : RegType::kX86_Xmm;
+          func._rets[valueIndex].initReg(regType, valueIndex, typeId);
+          break;
+        }
+
+        case TypeId::kFloat80: {
+          // 80-bit floats are always returned by FP0.
+          func._rets[valueIndex].initReg(RegType::kX86_St, valueIndex, typeId);
+          break;
+        }
+
+        case TypeId::kMmx32:
+        case TypeId::kMmx64: {
+          // MM registers are returned through XMM (SystemV) or GPQ (Win64).
+          RegType regType = RegType::kX86_Mm;
+          uint32_t regIndex = valueIndex;
+          if (Environment::is64Bit(arch)) {
+            regType = cc.strategy() == CallConvStrategy::kDefault ? RegType::kX86_Xmm : RegType::kX86_Gpq;
+            regIndex = cc.strategy() == CallConvStrategy::kDefault ? valueIndex : gpReturnIndexes[valueIndex];
+
+            if (regIndex == BaseReg::kIdBad)
+              return DebugUtils::errored(kErrorInvalidState);
+          }
+
+          func._rets[valueIndex].initReg(regType, regIndex, typeId);
+          break;
+        }
+
+        default: {
+          func._rets[valueIndex].initReg(vecTypeIdToRegType(typeId), valueIndex, typeId);
+          break;
+        }
+      }
+    }
+  }
+
+  switch (cc.strategy()) {
+    case CallConvStrategy::kDefault: {
+      uint32_t gpzPos = 0;
+      uint32_t vecPos = 0;
+
+      for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+        unpackValues(func, func._args[argIndex]);
+
+        for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+          FuncValue& arg = func._args[argIndex][valueIndex];
+
+          // Terminate if there are no more arguments in the pack.
+          if (!arg)
+            break;
+
+          TypeId typeId = arg.typeId();
+
+          if (TypeUtils::isInt(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (gpzPos < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kGp].id[gpzPos];
+
+            if (regId != BaseReg::kIdBad) {
+              RegType regType = typeId <= TypeId::kUInt32 ? RegType::kX86_Gpd : RegType::kX86_Gpq;
+              arg.assignRegData(regType, regId);
+              func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+              gpzPos++;
+            }
+            else {
+              uint32_t size = Support::max<uint32_t>(TypeUtils::sizeOf(typeId), registerSize);
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += size;
+            }
+            continue;
+          }
+
+          if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (vecPos < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kVec].id[vecPos];
+
+            if (TypeUtils::isFloat(typeId)) {
+              // If this is a float, but `kFlagPassFloatsByVec` is false, we have to use stack instead. This should
+              // be only used by 32-bit calling conventions.
+              if (!cc.hasFlag(CallConvFlags::kPassFloatsByVec))
+                regId = BaseReg::kIdBad;
+            }
+            else {
+              // Pass vector registers via stack if this is a variable arguments function. This should be only used
+              // by 32-bit calling conventions.
+              if (signature.hasVarArgs() && cc.hasFlag(CallConvFlags::kPassVecByStackIfVA))
+                regId = BaseReg::kIdBad;
+            }
+
+            if (regId != BaseReg::kIdBad) {
+              arg.initTypeId(typeId);
+              arg.assignRegData(vecTypeIdToRegType(typeId), regId);
+              func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+              vecPos++;
+            }
+            else {
+              uint32_t size = TypeUtils::sizeOf(typeId);
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += size;
+            }
+            continue;
+          }
+        }
+      }
+      break;
+    }
+
+    case CallConvStrategy::kX64Windows:
+    case CallConvStrategy::kX64VectorCall: {
+      // Both X64 and VectorCall behave similarly - arguments are indexed from left to right. The position of the
+      // argument determines in which register the argument is allocated, so it's either GP or one of XMM/YMM/ZMM
+      // registers.
+      //
+      //       [       X64       ] [VecCall]
+      // Index: #0   #1   #2   #3   #4   #5
+      //
+      // GP   : RCX  RDX  R8   R9
+      // VEC  : XMM0 XMM1 XMM2 XMM3 XMM4 XMM5
+      //
+      // For example function `f(int a, double b, int c, double d)` will be:
+      //
+      //        (a)  (b)  (c)  (d)
+      //        RCX  XMM1 R8   XMM3
+      //
+      // Unused vector registers are used by HVA.
+      bool isVectorCall = (cc.strategy() == CallConvStrategy::kX64VectorCall);
+
+      for (uint32_t argIndex = 0; argIndex < argCount; argIndex++) {
+        unpackValues(func, func._args[argIndex]);
+
+        for (uint32_t valueIndex = 0; valueIndex < Globals::kMaxValuePack; valueIndex++) {
+          FuncValue& arg = func._args[argIndex][valueIndex];
+
+          // Terminate if there are no more arguments in the pack.
+          if (!arg)
+            break;
+
+          TypeId typeId = arg.typeId();
+          uint32_t size = TypeUtils::sizeOf(typeId);
+
+          if (TypeUtils::isInt(typeId) || TypeUtils::isMmx(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (argIndex < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kGp].id[argIndex];
+
+            if (regId != BaseReg::kIdBad) {
+              RegType regType = size <= 4 && !TypeUtils::isMmx(typeId) ? RegType::kX86_Gpd : RegType::kX86_Gpq;
+              arg.assignRegData(regType, regId);
+              func.addUsedRegs(RegGroup::kGp, Support::bitMask(regId));
+            }
+            else {
+              arg.assignStackOffset(int32_t(stackOffset));
+              stackOffset += 8;
+            }
+            continue;
+          }
+
+          if (TypeUtils::isFloat(typeId) || TypeUtils::isVec(typeId)) {
+            uint32_t regId = BaseReg::kIdBad;
+
+            if (argIndex < CallConv::kMaxRegArgsPerGroup)
+              regId = cc._passedOrder[RegGroup::kVec].id[argIndex];
+
+            if (regId != BaseReg::kIdBad) {
+              // X64-ABI doesn't allow vector types (XMM|YMM|ZMM) to be passed via registers, however, VectorCall
+              // was designed for that purpose.
+              if (TypeUtils::isFloat(typeId) || isVectorCall) {
+                RegType regType = vecTypeIdToRegType(typeId);
+                arg.assignRegData(regType, regId);
+                func.addUsedRegs(RegGroup::kVec, Support::bitMask(regId));
+                continue;
+              }
+            }
+
+            // Passed via stack if the argument is float/double or indirectly. The trap is - if the argument is
+            // passed indirectly, the address can be passed via register, if the argument's index has GP one.
+            if (TypeUtils::isFloat(typeId)) {
+              arg.assignStackOffset(int32_t(stackOffset));
+            }
+            else {
+              uint32_t gpRegId = cc._passedOrder[RegGroup::kGp].id[argIndex];
+              if (gpRegId != BaseReg::kIdBad)
+                arg.assignRegData(RegType::kX86_Gpq, gpRegId);
+              else
+                arg.assignStackOffset(int32_t(stackOffset));
+              arg.addFlags(FuncValue::kFlagIsIndirect);
+            }
+
+            // Always 8 bytes (float/double/pointer).
+            stackOffset += 8;
+            continue;
+          }
+        }
+      }
+      break;
+    }
+  }
+
+  func._argStackSize = stackOffset;
+  return kErrorOk;
+}
+
+} // {FuncInternal}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
--- a/src/asmjit/x86/x86func_p.h
+++ b/src/asmjit/x86/x86func_p.h
@ -0,0 +1,33 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86FUNC_P_H_INCLUDED
+#define ASMJIT_X86_X86FUNC_P_H_INCLUDED
+
+#include "../core/func.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86-specific function API (calling conventions and other utilities).
+namespace FuncInternal {
+
+//! Initialize `CallConv` structure (X86 specific).
+Error initCallConv(CallConv& cc, CallConvId ccId, const Environment& environment) noexcept;
+
+//! Initialize `FuncDetail` (X86 specific).
+Error initFuncDetail(FuncDetail& func, const FuncSignature& signature, uint32_t registerSize) noexcept;
+
+} // {FuncInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86FUNC_P_H_INCLUDED
--- a/src/asmjit/x86/x86globals.h
+++ b/src/asmjit/x86/x86globals.h
--- a/src/asmjit/x86/x86instapi.cpp
+++ b/src/asmjit/x86/x86instapi.cpp
--- a/src/asmjit/x86/x86instapi_p.h
+++ b/src/asmjit/x86/x86instapi_p.h
@ -0,0 +1,41 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
+#define ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
+
+#include "../core/inst.h"
+#include "../core/operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace InstInternal {
+
+#ifndef ASMJIT_NO_TEXT
+Error ASMJIT_CDECL instIdToString(Arch arch, InstId instId, String& output) noexcept;
+InstId ASMJIT_CDECL stringToInstId(Arch arch, const char* s, size_t len) noexcept;
+#endif // !ASMJIT_NO_TEXT
+
+#ifndef ASMJIT_NO_VALIDATION
+Error ASMJIT_CDECL validate(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, ValidationFlags validationFlags) noexcept;
+#endif // !ASMJIT_NO_VALIDATION
+
+#ifndef ASMJIT_NO_INTROSPECTION
+Error ASMJIT_CDECL queryRWInfo(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, InstRWInfo* out) noexcept;
+Error ASMJIT_CDECL queryFeatures(Arch arch, const BaseInst& inst, const Operand_* operands, size_t opCount, CpuFeatures* out) noexcept;
+#endif // !ASMJIT_NO_INTROSPECTION
+
+} // {InstInternal}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTAPI_P_H_INCLUDED
--- a/src/asmjit/x86/x86instdb.cpp
+++ b/src/asmjit/x86/x86instdb.cpp
--- a/src/asmjit/x86/x86instdb.h
+++ b/src/asmjit/x86/x86instdb.h
@ -0,0 +1,563 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTDB_H_INCLUDED
+#define ASMJIT_X86_X86INSTDB_H_INCLUDED
+
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Instruction database (X86).
+namespace InstDB {
+
+//! Describes which operation mode is supported by an instruction.
+enum class Mode : uint8_t {
+  //! Invalid mode.
+  kNone = 0x00u,
+  //! X86 mode supported.
+  kX86 = 0x01u,
+  //! X64 mode supported.
+  kX64 = 0x02u,
+  //! Both X86 and X64 modes supported.
+  kAny = 0x03u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(Mode)
+
+//! Converts architecture to operation mode, see \ref Mode.
+static constexpr Mode modeFromArch(Arch arch) noexcept {
+  return arch == Arch::kX86 ? Mode::kX86 :
+         arch == Arch::kX64 ? Mode::kX64 : Mode::kNone;
+}
+
+//! Operand signature flags used by \ref OpSignature.
+enum class OpFlags : uint64_t {
+  //! No operand flags.
+  kNone = 0u,
+
+  kRegGpbLo        = 0x0000000000000001u, //!< Operand can be low 8-bit GPB register.
+  kRegGpbHi        = 0x0000000000000002u, //!< Operand can be high 8-bit GPB register.
+  kRegGpw          = 0x0000000000000004u, //!< Operand can be 16-bit GPW register.
+  kRegGpd          = 0x0000000000000008u, //!< Operand can be 32-bit GPD register.
+  kRegGpq          = 0x0000000000000010u, //!< Operand can be 64-bit GPQ register.
+  kRegXmm          = 0x0000000000000020u, //!< Operand can be 128-bit XMM register.
+  kRegYmm          = 0x0000000000000040u, //!< Operand can be 256-bit YMM register.
+  kRegZmm          = 0x0000000000000080u, //!< Operand can be 512-bit ZMM register.
+  kRegMm           = 0x0000000000000100u, //!< Operand can be 64-bit MM register.
+  kRegKReg         = 0x0000000000000200u, //!< Operand can be 64-bit K register.
+  kRegSReg         = 0x0000000000000400u, //!< Operand can be SReg (segment register).
+  kRegCReg         = 0x0000000000000800u, //!< Operand can be CReg (control register).
+  kRegDReg         = 0x0000000000001000u, //!< Operand can be DReg (debug register).
+  kRegSt           = 0x0000000000002000u, //!< Operand can be 80-bit ST register (X87).
+  kRegBnd          = 0x0000000000004000u, //!< Operand can be 128-bit BND register.
+  kRegTmm          = 0x0000000000008000u, //!< Operand can be 0..8192-bit TMM register.
+  kRegMask         = 0x000000000000FFFFu, //!< Mask of all possible register types.
+
+  kMemUnspecified  = 0x0000000000040000u, //!< Operand can be a scalar memory pointer without size.
+  kMem8            = 0x0000000000080000u, //!< Operand can be an 8-bit memory pointer.
+  kMem16           = 0x0000000000100000u, //!< Operand can be a 16-bit memory pointer.
+  kMem32           = 0x0000000000200000u, //!< Operand can be a 32-bit memory pointer.
+  kMem48           = 0x0000000000400000u, //!< Operand can be a 48-bit memory pointer (FAR pointers only).
+  kMem64           = 0x0000000000800000u, //!< Operand can be a 64-bit memory pointer.
+  kMem80           = 0x0000000001000000u, //!< Operand can be an 80-bit memory pointer.
+  kMem128          = 0x0000000002000000u, //!< Operand can be a 128-bit memory pointer.
+  kMem256          = 0x0000000004000000u, //!< Operand can be a 256-bit memory pointer.
+  kMem512          = 0x0000000008000000u, //!< Operand can be a 512-bit memory pointer.
+  kMem1024         = 0x0000000010000000u, //!< Operand can be a 1024-bit memory pointer.
+  kMemMask         = 0x000000001FFC0000u, //!< Mask of all possible scalar memory types.
+
+  kVm32x           = 0x0000000040000000u, //!< Operand can be a vm32x (vector) pointer.
+  kVm32y           = 0x0000000080000000u, //!< Operand can be a vm32y (vector) pointer.
+  kVm32z           = 0x0000000100000000u, //!< Operand can be a vm32z (vector) pointer.
+  kVm64x           = 0x0000000200000000u, //!< Operand can be a vm64x (vector) pointer.
+  kVm64y           = 0x0000000400000000u, //!< Operand can be a vm64y (vector) pointer.
+  kVm64z           = 0x0000000800000000u, //!< Operand can be a vm64z (vector) pointer.
+  kVmMask          = 0x0000000FC0000000u, //!< Mask of all possible vector memory types.
+
+  kImmI4           = 0x0000001000000000u, //!< Operand can be signed 4-bit immediate.
+  kImmU4           = 0x0000002000000000u, //!< Operand can be unsigned 4-bit immediate.
+  kImmI8           = 0x0000004000000000u, //!< Operand can be signed 8-bit immediate.
+  kImmU8           = 0x0000008000000000u, //!< Operand can be unsigned 8-bit immediate.
+  kImmI16          = 0x0000010000000000u, //!< Operand can be signed 16-bit immediate.
+  kImmU16          = 0x0000020000000000u, //!< Operand can be unsigned 16-bit immediate.
+  kImmI32          = 0x0000040000000000u, //!< Operand can be signed 32-bit immediate.
+  kImmU32          = 0x0000080000000000u, //!< Operand can be unsigned 32-bit immediate.
+  kImmI64          = 0x0000100000000000u, //!< Operand can be signed 64-bit immediate.
+  kImmU64          = 0x0000200000000000u, //!< Operand can be unsigned 64-bit immediate.
+  kImmMask         = 0x00003FF000000000u, //!< Mask of all immediate types.
+
+  kRel8            = 0x0000400000000000u, //!< Operand can be relative 8-bit  displacement.
+  kRel32           = 0x0000800000000000u, //!< Operand can be relative 32-bit displacement.
+  kRelMask         = 0x0000C00000000000u, //!< Mask of all relative displacement types.
+
+  kFlagMemBase     = 0x0001000000000000u, //!< Flag: Only memory base is allowed (no index, no offset).
+  kFlagMemDs       = 0x0002000000000000u, //!< Flag: Implicit memory operand's DS segment.
+  kFlagMemEs       = 0x0004000000000000u, //!< Flag: Implicit memory operand's ES segment.
+
+  kFlagMib         = 0x0008000000000000u, //!< Flag: Operand is MIB (base+index) pointer.
+  kFlagTMem        = 0x0010000000000000u, //!< Flag: Operand is TMEM (sib_mem), AMX memory pointer.
+
+  kFlagImplicit    = 0x0080000000000000u, //!< Flag: Operand is implicit.
+  kFlagMask        = 0x009F000000000000u, //!< Mask of all flags.
+
+  //! Contains mask of all registers, memory operands, immediate operands, and displacement operands.
+  kOpMask          = kRegMask | kMemMask | kVmMask | kImmMask | kRelMask
+};
+ASMJIT_DEFINE_ENUM_FLAGS(OpFlags)
+
+//! Operand signature.
+//!
+//! Contains all possible operand combinations, memory size information, and a fixed register id (or `BaseReg::kIdBad`
+//! if fixed id isn't required).
+struct OpSignature {
+  //! \name Members
+  //! \{
+
+  uint64_t _flags : 56;
+  uint64_t _regMask : 8;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns operand signature flags.
+  inline OpFlags flags() const noexcept { return (OpFlags)_flags; }
+
+  //! Tests whether the given `flag` is set.
+  inline bool hasFlag(OpFlags flag) const noexcept { return (_flags & uint64_t(flag)) != 0; }
+
+  //! Tests whether this signature contains at least one register operand of any type.
+  inline bool hasReg() const noexcept { return hasFlag(OpFlags::kRegMask); }
+  //! Tests whether this signature contains at least one scalar memory operand of any type.
+  inline bool hasMem() const noexcept { return hasFlag(OpFlags::kMemMask); }
+  //! Tests whether this signature contains at least one vector memory operand of any type.
+  inline bool hasVm() const noexcept { return hasFlag(OpFlags::kVmMask); }
+  //! Tests whether this signature contains at least one immediate operand of any type.
+  inline bool hasImm() const noexcept { return hasFlag(OpFlags::kImmMask); }
+  //! Tests whether this signature contains at least one relative displacement operand of any type.
+  inline bool hasRel() const noexcept { return hasFlag(OpFlags::kRelMask); }
+
+  //! Tests whether the operand is implicit.
+  inline bool isImplicit() const noexcept { return hasFlag(OpFlags::kFlagImplicit); }
+
+  //! Returns a physical register mask.
+  inline RegMask regMask() const noexcept { return _regMask; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const OpSignature _opSignatureTable[];
+
+//! Instruction signature.
+//!
+//! Contains a sequence of operands' combinations and other metadata that defines a single instruction. This data is
+//! used by instruction validator.
+struct InstSignature {
+  //! \name Members
+  //! \{
+
+  //! Count of operands in `opIndex` (0..6).
+  uint8_t _opCount : 3;
+  //! Architecture modes supported (X86 / X64).
+  uint8_t _mode : 2;
+  //! Number of implicit operands.
+  uint8_t _implicitOpCount : 3;
+  //! Reserved for future use.
+  uint8_t _reserved;
+  //! Indexes to `OpSignature` table.
+  uint8_t _opSignatureIndexes[Globals::kMaxOpCount];
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction operation mode.
+  inline Mode mode() const noexcept { return (Mode)_mode; }
+  //! Tests whether the instruction supports the given operating mode.
+  inline bool supportsMode(Mode mode) const noexcept { return (uint8_t(_mode) & uint8_t(mode)) != 0; }
+
+  //! Returns the number of operands of this signature.
+  inline uint32_t opCount() const noexcept { return _opCount; }
+  //! Returns the number of implicit operands this signature has.
+  inline uint32_t implicitOpCount() const noexcept { return _implicitOpCount; }
+  //! Tests whether this instruction signature has at least one implicit operand.
+  inline bool hasImplicitOperands() const noexcept { return _implicitOpCount != 0; }
+
+  //! Returns indexes to \ref _opSignatureTable for each operand of the instruction.
+  //!
+  //! \note The returned array always provides indexes for all operands (see \ref Globals::kMaxOpCount) even if the
+  //! instruction provides less operands. Undefined operands have always index of zero.
+  inline const uint8_t* opSignatureIndexes() const noexcept { return _opSignatureIndexes; }
+
+  //! Returns index to \ref _opSignatureTable, corresponding to the requested operand `index` of the instruction.
+  inline uint8_t opSignatureIndex(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _opSignatureIndexes[index];
+  }
+
+  //! Returns \ref OpSignature corresponding to the requested operand `index` of the instruction.
+  inline const OpSignature& opSignature(size_t index) const noexcept {
+    ASMJIT_ASSERT(index < Globals::kMaxOpCount);
+    return _opSignatureTable[_opSignatureIndexes[index]];
+  }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstSignature _instSignatureTable[];
+
+//! Instruction flags.
+//!
+//! Details about instruction encoding, operation, features, and some limitations.
+enum class InstFlags : uint32_t {
+  //! No flags.
+  kNone = 0x00000000u,
+
+  // Instruction Family
+  // ------------------
+  //
+  // Instruction family information.
+
+  //! Instruction that accesses FPU registers.
+  kFpu = 0x00000100u,
+  //! Instruction that accesses MMX registers (including 3DNOW and GEODE) and EMMS.
+  kMmx = 0x00000200u,
+  //! Instruction that accesses XMM registers (SSE, AVX, AVX512).
+  kVec = 0x00000400u,
+
+  // FPU Flags
+  // ---------
+  //
+  // Used to tell the encoder which memory operand sizes are encodable.
+
+  //! FPU instruction can address `word_ptr` (shared with M80).
+  kFpuM16 = 0x00000800u,
+  //! FPU instruction can address `dword_ptr`.
+  kFpuM32 = 0x00001000u,
+  //! FPU instruction can address `qword_ptr`.
+  kFpuM64 = 0x00002000u,
+  //! FPU instruction can address `tword_ptr` (shared with M16).
+  kFpuM80 = 0x00000800u,
+
+  // Prefixes and Encoding Flags
+  // ---------------------------
+  //
+  // These describe optional X86 prefixes that can be used to change the instruction's operation.
+
+  //! Instruction can be prefixed with using the REP(REPE) or REPNE prefix.
+  kRep = 0x00004000u,
+  //! Rep prefix is accepted, but it has no effect other than being emitted with the instruction (as an extra byte).
+  kRepIgnored = 0x00008000u,
+  //! Instruction can be prefixed with using the LOCK prefix.
+  kLock = 0x00010000u,
+  //! Instruction can be prefixed with using the XACQUIRE prefix.
+  kXAcquire = 0x00020000u,
+  //! Instruction can be prefixed with using the XRELEASE prefix.
+  kXRelease = 0x00040000u,
+  //! Instruction uses MIB (BNDLDX|BNDSTX) to encode two registers.
+  kMib = 0x00080000u,
+  //! Instruction uses VSIB instead of legacy SIB.
+  kVsib = 0x00100000u,
+  //! Instruction uses TSIB (or SIB_MEM) encoding (MODRM followed by SIB).
+  kTsib = 0x00200000u,
+
+  // If both `kPrefixVex` and `kPrefixEvex` flags are specified it means that the instructions can be encoded
+  // by either VEX or EVEX prefix. In that case AsmJit checks global options and also instruction options to decide
+  // whether to emit VEX or EVEX prefix.
+
+  //! Instruction can be encoded by VEX|XOP (AVX|AVX2|BMI|XOP|...).
+  kVex = 0x00400000u,
+  //! Instruction can be encoded by EVEX (AVX512).
+  kEvex = 0x00800000u,
+  //! EVEX encoding is preferred over VEX encoding (AVX515_VNNI vs AVX_VNNI).
+  kPreferEvex = 0x01000000u,
+  //! EVEX and VEX signatures are compatible.
+  kEvexCompat = 0x02000000u,
+  //! EVEX instruction requires K register in the first operand (compare instructions).
+  kEvexKReg = 0x04000000u,
+  //! EVEX instruction requires two operands and K register as a selector (gather instructions).
+  kEvexTwoOp = 0x08000000u,
+  //! VEX instruction that can be transformed to a compatible EVEX instruction.
+  kEvexTransformable = 0x10000000u,
+
+  // Other Flags
+  // -----------
+
+  //! Instruction uses consecutive registers.
+  //!
+  //! Used by V4FMADDPS, V4FMADDSS, V4FNMADDPS, V4FNMADDSS, VP4DPWSSD, VP4DPWSSDS, VP2INTERSECTD, and VP2INTERSECTQ
+  //! instructions
+  kConsecutiveRegs = 0x20000000u
+};
+ASMJIT_DEFINE_ENUM_FLAGS(InstFlags)
+
+//! AVX-512 flags.
+enum class Avx512Flags : uint32_t {
+  //! No AVX-512 flags.
+  kNone = 0,
+
+  //! Internally used in tables, has no meaning.
+  k_ = 0x00000000u,
+  //! Supports masking {k1..k7}.
+  kK = 0x00000001u,
+  //! Supports zeroing {z}, must be used together with `kAvx512k`.
+  kZ = 0x00000002u,
+  //! Supports 'embedded-rounding' {er} with implicit {sae},
+  kER = 0x00000004u,
+  //! Supports 'suppress-all-exceptions' {sae}.
+  kSAE = 0x00000008u,
+  //! Supports 16-bit broadcast 'b16'.
+  kB16 = 0x00000010u,
+  //! Supports 32-bit broadcast 'b32'.
+  kB32 = 0x00000020u,
+  //! Supports 64-bit broadcast 'b64'.
+  kB64 = 0x00000040u,
+  //! Operates on a vector of consecutive registers (AVX512_4FMAPS and AVX512_4VNNIW).
+  kT4X = 0x00000080u,
+
+  //! Implicit zeroing if {k} masking is used. Using {z} is not valid in this case as it's implicit.
+  kImplicitZ = 0x00000100,
+};
+ASMJIT_DEFINE_ENUM_FLAGS(Avx512Flags)
+
+//! Instruction common information.
+//!
+//! Aggregated information shared across one or more instruction.
+struct CommonInfo {
+  //! Instruction flags.
+  uint32_t _flags;
+  //! Reserved for future use.
+  uint32_t _avx512Flags : 11;
+  //! First `InstSignature` entry in the database.
+  uint32_t _iSignatureIndex : 11;
+  //! Number of relevant `ISignature` entries.
+  uint32_t _iSignatureCount : 5;
+  //! Instruction control flow category, see \ref InstControlFlow.
+  uint32_t _controlFlow : 3;
+  //! Specifies what happens if all source operands share the same register.
+  uint32_t _sameRegHint : 2;
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns instruction flags.
+  inline InstFlags flags() const noexcept { return (InstFlags)_flags; }
+  //! Tests whether the instruction has a `flag`.
+  inline bool hasFlag(InstFlags flag) const noexcept { return Support::test(_flags, flag); }
+
+  //! Returns instruction AVX-512 flags.
+  inline Avx512Flags avx512Flags() const noexcept { return (Avx512Flags)_avx512Flags; }
+  //! Tests whether the instruction has an AVX-512 `flag`.
+  inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return Support::test(_avx512Flags, flag); }
+
+  //! Tests whether the instruction is FPU instruction.
+  inline bool isFpu() const noexcept { return hasFlag(InstFlags::kFpu); }
+  //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
+  inline bool isMmx() const noexcept { return hasFlag(InstFlags::kMmx); }
+  //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isVec() const noexcept { return hasFlag(InstFlags::kVec); }
+  //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
+  inline bool isSse() const noexcept { return (flags() & (InstFlags::kVec | InstFlags::kVex | InstFlags::kEvex)) == InstFlags::kVec; }
+  //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isAvx() const noexcept { return isVec() && isVexOrEvex(); }
+
+  //! Tests whether the instruction can be prefixed with LOCK prefix.
+  inline bool hasLockPrefix() const noexcept { return hasFlag(InstFlags::kLock); }
+  //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
+  inline bool hasRepPrefix() const noexcept { return hasFlag(InstFlags::kRep); }
+  //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
+  inline bool hasXAcquirePrefix() const noexcept { return hasFlag(InstFlags::kXAcquire); }
+  //! Tests whether the instruction can be prefixed with XRELEASE prefix.
+  inline bool hasXReleasePrefix() const noexcept { return hasFlag(InstFlags::kXRelease); }
+
+  //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
+  inline bool isRepIgnored() const noexcept { return hasFlag(InstFlags::kRepIgnored); }
+  //! Tests whether the instruction uses MIB.
+  inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
+  //! Tests whether the instruction uses VSIB.
+  inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
+  //! Tests whether the instruction uses TSIB (AMX, instruction requires MOD+SIB).
+  inline bool isTsibOp() const noexcept { return hasFlag(InstFlags::kTsib); }
+  //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
+  inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
+
+  //! Tests whether the instruction should prefer EVEX prefix instead of VEX prefix.
+  inline bool preferEvex() const noexcept { return hasFlag(InstFlags::kPreferEvex); }
+
+  inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
+  inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
+  inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
+  inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
+
+  //! Tests whether the instruction supports AVX512 masking {k}.
+  inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
+  //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
+  inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
+  //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
+  inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
+  //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
+  inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
+  //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
+  inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
+  //! Tests whether the instruction supports AVX512 broadcast (16-bit).
+  inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
+  //! Tests whether the instruction supports AVX512 broadcast (32-bit).
+  inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
+  //! Tests whether the instruction supports AVX512 broadcast (64-bit).
+  inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
+
+  // Returns the size of the broadcast - either 2, 4, or 8, or 0 if broadcast is not supported.
+  inline uint32_t broadcastSize() const noexcept {
+    constexpr uint32_t kShift = Support::ConstCTZ<uint32_t(Avx512Flags::kB16)>::value;
+    return (uint32_t(_avx512Flags) & uint32_t(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64)) >> (kShift - 1);
+  }
+
+  inline uint32_t signatureIndex() const noexcept { return _iSignatureIndex; }
+  inline uint32_t signatureCount() const noexcept { return _iSignatureCount; }
+
+  inline const InstSignature* signatureData() const noexcept { return _instSignatureTable + _iSignatureIndex; }
+  inline const InstSignature* signatureEnd() const noexcept { return _instSignatureTable + _iSignatureIndex + _iSignatureCount; }
+
+  //! Returns a control flow category of the instruction.
+  inline InstControlFlow controlFlow() const noexcept { return (InstControlFlow)_controlFlow; }
+
+  //! Returns a hint that can be used when both inputs are the same register.
+  inline InstSameRegHint sameRegHint() const noexcept { return (InstSameRegHint)_sameRegHint; }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const CommonInfo _commonInfoTable[];
+
+//! Instruction information.
+struct InstInfo {
+  //! Index to \ref _nameData.
+  uint32_t _nameDataIndex : 14;
+  //! Index to \ref _commonInfoTable.
+  uint32_t _commonInfoIndex : 10;
+  //! Index to \ref _additionalInfoTable.
+  uint32_t _additionalInfoIndex : 8;
+
+  //! Instruction encoding (internal encoding identifier used by \ref Assembler).
+  uint8_t _encoding;
+  //! Main opcode value (0..255).
+  uint8_t _mainOpcodeValue;
+  //! Index to \ref _mainOpcodeTable` that is combined with \ref _mainOpcodeValue to form the final opcode.
+  uint8_t _mainOpcodeIndex;
+  //! Index to \ref _altOpcodeTable that contains a full alternative opcode.
+  uint8_t _altOpcodeIndex;
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns common information, see \ref CommonInfo.
+  inline const CommonInfo& commonInfo() const noexcept { return _commonInfoTable[_commonInfoIndex]; }
+
+  //! Returns instruction flags, see \ref Flags.
+  inline InstFlags flags() const noexcept { return commonInfo().flags(); }
+  //! Tests whether the instruction has flag `flag`, see \ref Flags.
+  inline bool hasFlag(InstFlags flag) const noexcept { return commonInfo().hasFlag(flag); }
+
+  //! Returns instruction AVX-512 flags, see \ref Avx512Flags.
+  inline Avx512Flags avx512Flags() const noexcept { return commonInfo().avx512Flags(); }
+  //! Tests whether the instruction has an AVX-512 `flag`, see \ref Avx512Flags.
+  inline bool hasAvx512Flag(Avx512Flags flag) const noexcept { return commonInfo().hasAvx512Flag(flag); }
+
+  //! Tests whether the instruction is FPU instruction.
+  inline bool isFpu() const noexcept { return commonInfo().isFpu(); }
+  //! Tests whether the instruction is MMX/3DNOW instruction that accesses MMX registers (includes EMMS and FEMMS).
+  inline bool isMmx() const noexcept { return commonInfo().isMmx(); }
+  //! Tests whether the instruction is SSE|AVX|AVX512 instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isVec() const noexcept { return commonInfo().isVec(); }
+  //! Tests whether the instruction is SSE+ (SSE4.2, AES, SHA included) instruction that accesses XMM registers.
+  inline bool isSse() const noexcept { return commonInfo().isSse(); }
+  //! Tests whether the instruction is AVX+ (FMA included) instruction that accesses XMM|YMM|ZMM registers.
+  inline bool isAvx() const noexcept { return commonInfo().isAvx(); }
+
+  //! Tests whether the instruction can be prefixed with LOCK prefix.
+  inline bool hasLockPrefix() const noexcept { return commonInfo().hasLockPrefix(); }
+  //! Tests whether the instruction can be prefixed with REP (REPE|REPZ) prefix.
+  inline bool hasRepPrefix() const noexcept { return commonInfo().hasRepPrefix(); }
+  //! Tests whether the instruction can be prefixed with XACQUIRE prefix.
+  inline bool hasXAcquirePrefix() const noexcept { return commonInfo().hasXAcquirePrefix(); }
+  //! Tests whether the instruction can be prefixed with XRELEASE prefix.
+  inline bool hasXReleasePrefix() const noexcept { return commonInfo().hasXReleasePrefix(); }
+
+  //! Tests whether the rep prefix is supported by the instruction, but ignored (has no effect).
+  inline bool isRepIgnored() const noexcept { return commonInfo().isRepIgnored(); }
+  //! Tests whether the instruction uses MIB.
+  inline bool isMibOp() const noexcept { return hasFlag(InstFlags::kMib); }
+  //! Tests whether the instruction uses VSIB.
+  inline bool isVsibOp() const noexcept { return hasFlag(InstFlags::kVsib); }
+  //! Tests whether the instruction uses VEX (can be set together with EVEX if both are encodable).
+  inline bool isVex() const noexcept { return hasFlag(InstFlags::kVex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isEvex() const noexcept { return hasFlag(InstFlags::kEvex); }
+  //! Tests whether the instruction uses EVEX (can be set together with VEX if both are encodable).
+  inline bool isVexOrEvex() const noexcept { return hasFlag(InstFlags::kVex | InstFlags::kEvex); }
+
+  inline bool isEvexCompatible() const noexcept { return hasFlag(InstFlags::kEvexCompat); }
+  inline bool isEvexKRegOnly() const noexcept { return hasFlag(InstFlags::kEvexKReg); }
+  inline bool isEvexTwoOpOnly() const noexcept { return hasFlag(InstFlags::kEvexTwoOp); }
+  inline bool isEvexTransformable() const noexcept { return hasFlag(InstFlags::kEvexTransformable); }
+
+  //! Tests whether the instruction supports AVX512 masking {k}.
+  inline bool hasAvx512K() const noexcept { return hasAvx512Flag(Avx512Flags::kK); }
+  //! Tests whether the instruction supports AVX512 zeroing {k}{z}.
+  inline bool hasAvx512Z() const noexcept { return hasAvx512Flag(Avx512Flags::kZ); }
+  //! Tests whether the instruction supports AVX512 embedded-rounding {er}.
+  inline bool hasAvx512ER() const noexcept { return hasAvx512Flag(Avx512Flags::kER); }
+  //! Tests whether the instruction supports AVX512 suppress-all-exceptions {sae}.
+  inline bool hasAvx512SAE() const noexcept { return hasAvx512Flag(Avx512Flags::kSAE); }
+  //! Tests whether the instruction supports AVX512 broadcast (either 32-bit or 64-bit).
+  inline bool hasAvx512B() const noexcept { return hasAvx512Flag(Avx512Flags::kB16 | Avx512Flags::kB32 | Avx512Flags::kB64); }
+  //! Tests whether the instruction supports AVX512 broadcast (16-bit).
+  inline bool hasAvx512B16() const noexcept { return hasAvx512Flag(Avx512Flags::kB16); }
+  //! Tests whether the instruction supports AVX512 broadcast (32-bit).
+  inline bool hasAvx512B32() const noexcept { return hasAvx512Flag(Avx512Flags::kB32); }
+  //! Tests whether the instruction supports AVX512 broadcast (64-bit).
+  inline bool hasAvx512B64() const noexcept { return hasAvx512Flag(Avx512Flags::kB64); }
+
+  //! Returns a control flow category of the instruction.
+  inline InstControlFlow controlFlow() const noexcept { return commonInfo().controlFlow(); }
+  //! Returns a hint that can be used when both inputs are the same register.
+  inline InstSameRegHint sameRegHint() const noexcept { return commonInfo().sameRegHint(); }
+
+  inline uint32_t signatureIndex() const noexcept { return commonInfo().signatureIndex(); }
+  inline uint32_t signatureCount() const noexcept { return commonInfo().signatureCount(); }
+
+  inline const InstSignature* signatureData() const noexcept { return commonInfo().signatureData(); }
+  inline const InstSignature* signatureEnd() const noexcept { return commonInfo().signatureEnd(); }
+
+  //! \}
+};
+
+ASMJIT_VARAPI const InstInfo _instInfoTable[];
+
+static inline const InstInfo& infoById(InstId instId) noexcept {
+  ASMJIT_ASSERT(Inst::isDefinedId(instId));
+  return _instInfoTable[instId];
+}
+
+//! \cond INTERNAL
+static_assert(sizeof(OpSignature) == 8, "InstDB::OpSignature must be 8 bytes long");
+//! \endcond
+
+} // {InstDB}
+
+//! \}
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTDB_H_INCLUDED
--- a/src/asmjit/x86/x86instdb_p.h
+++ b/src/asmjit/x86/x86instdb_p.h
@ -0,0 +1,311 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86INSTDB_P_H_INCLUDED
+#define ASMJIT_X86_X86INSTDB_P_H_INCLUDED
+
+#include "../x86/x86instdb.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+namespace InstDB {
+
+//! Instruction encoding (X86).
+//!
+//! This is a specific identifier that is used by AsmJit to describe the way each instruction is encoded. Some
+//! encodings are special only for a single instruction as X86 instruction set contains a lot of legacy encodings,
+//! and some encodings describe a group of instructions that share some commons, like MMX, SSE, AVX, AVX512
+//! instructions, etc...
+enum EncodingId : uint32_t {
+  kEncodingNone = 0,                     //!< Never used.
+  kEncodingX86Op,                        //!< X86 [OP].
+  kEncodingX86Op_Mod11RM,                //!< X86 [OP] (opcode with ModRM byte where MOD must be 11b).
+  kEncodingX86Op_Mod11RM_I8,             //!< X86 [OP] (opcode with ModRM byte + 8-bit immediate).
+  kEncodingX86Op_xAddr,                  //!< X86 [OP] (implicit address in the first register operand).
+  kEncodingX86Op_xAX,                    //!< X86 [OP] (implicit or explicit '?AX' form).
+  kEncodingX86Op_xDX_xAX,                //!< X86 [OP] (implicit or explicit '?DX, ?AX' form).
+  kEncodingX86Op_MemZAX,                 //!< X86 [OP] (implicit or explicit '[EAX|RAX]' form).
+  kEncodingX86I_xAX,                     //!< X86 [I] (implicit or explicit '?AX' form).
+  kEncodingX86M,                         //!< X86 [M] (handles 2|4|8-bytes size).
+  kEncodingX86M_NoMemSize,               //!< X86 [M] (handles 2|4|8-bytes size, but doesn't consider memory size).
+  kEncodingX86M_NoSize,                  //!< X86 [M] (doesn't handle any size).
+  kEncodingX86M_GPB,                     //!< X86 [M] (handles single-byte size).
+  kEncodingX86M_GPB_MulDiv,              //!< X86 [M] (like GPB, handles implicit|explicit MUL|DIV|IDIV).
+  kEncodingX86M_Only,                    //!< X86 [M] (restricted to memory operand of any size).
+  kEncodingX86M_Only_EDX_EAX,            //!< X86 [M] (memory operand only, followed by implicit <edx> and <eax>).
+  kEncodingX86M_Nop,                     //!< X86 [M] (special case of NOP instruction).
+  kEncodingX86R_Native,                  //!< X86 [R] (register must be either 32-bit or 64-bit depending on arch).
+  kEncodingX86R_FromM,                   //!< X86 [R] - which specifies memory address.
+  kEncodingX86R32_EDX_EAX,               //!< X86 [R32] followed by implicit EDX and EAX.
+  kEncodingX86Rm,                        //!< X86 [RM] (doesn't handle single-byte size).
+  kEncodingX86Rm_Raw66H,                 //!< X86 [RM] (used by LZCNT, POPCNT, and TZCNT).
+  kEncodingX86Rm_NoSize,                 //!< X86 [RM] (doesn't add REX.W prefix if 64-bit reg is used).
+  kEncodingX86Mr,                        //!< X86 [MR] (doesn't handle single-byte size).
+  kEncodingX86Mr_NoSize,                 //!< X86 [MR] (doesn't handle any size).
+  kEncodingX86Arith,                     //!< X86 adc, add, and, cmp, or, sbb, sub, xor.
+  kEncodingX86Bswap,                     //!< X86 bswap.
+  kEncodingX86Bt,                        //!< X86 bt, btc, btr, bts.
+  kEncodingX86Call,                      //!< X86 call.
+  kEncodingX86Cmpxchg,                   //!< X86 [MR] cmpxchg.
+  kEncodingX86Cmpxchg8b_16b,             //!< X86 [MR] cmpxchg8b, cmpxchg16b.
+  kEncodingX86Crc,                       //!< X86 crc32.
+  kEncodingX86Enter,                     //!< X86 enter.
+  kEncodingX86Imul,                      //!< X86 imul.
+  kEncodingX86In,                        //!< X86 in.
+  kEncodingX86Ins,                       //!< X86 ins[b|q|d].
+  kEncodingX86IncDec,                    //!< X86 inc, dec.
+  kEncodingX86Int,                       //!< X86 int (interrupt).
+  kEncodingX86Jcc,                       //!< X86 jcc.
+  kEncodingX86JecxzLoop,                 //!< X86 jcxz, jecxz, jrcxz, loop, loope, loopne.
+  kEncodingX86Jmp,                       //!< X86 jmp.
+  kEncodingX86JmpRel,                    //!< X86 xbegin.
+  kEncodingX86LcallLjmp,                 //!< X86 lcall/ljmp.
+  kEncodingX86Lea,                       //!< X86 lea.
+  kEncodingX86Mov,                       //!< X86 mov (all possible cases).
+  kEncodingX86Movabs,                    //!< X86 movabs.
+  kEncodingX86MovsxMovzx,                //!< X86 movsx, movzx.
+  kEncodingX86MovntiMovdiri,             //!< X86 movnti/movdiri.
+  kEncodingX86EnqcmdMovdir64b,           //!< X86 enqcmd/enqcmds/movdir64b.
+  kEncodingX86Out,                       //!< X86 out.
+  kEncodingX86Outs,                      //!< X86 out[b|w|d].
+  kEncodingX86Push,                      //!< X86 push.
+  kEncodingX86Pop,                       //!< X86 pop.
+  kEncodingX86Ret,                       //!< X86 ret.
+  kEncodingX86Rot,                       //!< X86 rcl, rcr, rol, ror, sal, sar, shl, shr.
+  kEncodingX86Set,                       //!< X86 setcc.
+  kEncodingX86ShldShrd,                  //!< X86 shld, shrd.
+  kEncodingX86StrRm,                     //!< X86 lods.
+  kEncodingX86StrMr,                     //!< X86 scas, stos.
+  kEncodingX86StrMm,                     //!< X86 cmps, movs.
+  kEncodingX86Test,                      //!< X86 test.
+  kEncodingX86Xadd,                      //!< X86 xadd.
+  kEncodingX86Xchg,                      //!< X86 xchg.
+  kEncodingX86Fence,                     //!< X86 lfence, mfence, sfence.
+  kEncodingX86Bndmov,                    //!< X86 [RM|MR] (used by BNDMOV).
+  kEncodingFpuOp,                        //!< FPU [OP].
+  kEncodingFpuArith,                     //!< FPU fadd, fdiv, fdivr, fmul, fsub, fsubr.
+  kEncodingFpuCom,                       //!< FPU fcom, fcomp.
+  kEncodingFpuFldFst,                    //!< FPU fld, fst, fstp.
+  kEncodingFpuM,                         //!< FPU fiadd, ficom, ficomp, fidiv, fidivr, fild, fimul, fist, fistp, fisttp, fisub, fisubr.
+  kEncodingFpuR,                         //!< FPU fcmov, fcomi, fcomip, ffree, fucom, fucomi, fucomip, fucomp, fxch.
+  kEncodingFpuRDef,                      //!< FPU faddp, fdivp, fdivrp, fmulp, fsubp, fsubrp.
+  kEncodingFpuStsw,                      //!< FPU fnstsw, Fstsw.
+  kEncodingExtRm,                        //!< EXT [RM].
+  kEncodingExtRm_XMM0,                   //!< EXT [RM<XMM0>].
+  kEncodingExtRm_ZDI,                    //!< EXT [RM<ZDI>].
+  kEncodingExtRm_P,                      //!< EXT [RM] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtRm_Wx,                     //!< EXT [RM] (propagates REX.W if GPQ is used or the second operand is GPQ/QWORD_PTR).
+  kEncodingExtRm_Wx_GpqOnly,             //!< EXT [RM] (propagates REX.W if the first operand is GPQ register).
+  kEncodingExtRmRi,                      //!< EXT [RM|RI].
+  kEncodingExtRmRi_P,                    //!< EXT [RM|RI] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtRmi,                       //!< EXT [RMI].
+  kEncodingExtRmi_P,                     //!< EXT [RMI] (propagates 66H if the instruction uses XMM register).
+  kEncodingExtPextrw,                    //!< EXT pextrw.
+  kEncodingExtExtract,                   //!< EXT pextrb, pextrd, pextrq, extractps.
+  kEncodingExtMov,                       //!< EXT mov?? - #1:[MM|XMM, MM|XMM|Mem] #2:[MM|XMM|Mem, MM|XMM].
+  kEncodingExtMovbe,                     //!< EXT movbe.
+  kEncodingExtMovd,                      //!< EXT movd.
+  kEncodingExtMovq,                      //!< EXT movq.
+  kEncodingExtExtrq,                     //!< EXT extrq (SSE4A).
+  kEncodingExtInsertq,                   //!< EXT insrq (SSE4A).
+  kEncodingExt3dNow,                     //!< EXT [RMI] (3DNOW specific).
+  kEncodingVexOp,                        //!< VEX [OP].
+  kEncodingVexOpMod,                     //!< VEX [OP] with MODR/M.
+  kEncodingVexKmov,                      //!< VEX [RM|MR] (used by kmov[b|w|d|q]).
+  kEncodingVexR_Wx,                      //!< VEX|EVEX [R] (propagatex VEX.W if GPQ used).
+  kEncodingVexM,                         //!< VEX|EVEX [M].
+  kEncodingVexM_VM,                      //!< VEX|EVEX [M] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexMr_Lx,                     //!< VEX|EVEX [MR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMr_VM,                     //!< VEX|EVEX [MR] (VSIB support).
+  kEncodingVexMri,                       //!< VEX|EVEX [MRI].
+  kEncodingVexMri_Lx,                    //!< VEX|EVEX [MRI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMri_Vpextrw,               //!< VEX|EVEX [MRI] (special case required by VPEXTRW instruction).
+  kEncodingVexRm,                        //!< VEX|EVEX [RM].
+  kEncodingVexRm_ZDI,                    //!< VEX|EVEX [RM<ZDI>].
+  kEncodingVexRm_Wx,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRm_Lx,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRm_Lx_Narrow,              //!< VEX|EVEX [RM] (the destination vector size is narrowed).
+  kEncodingVexRm_Lx_Bcst,                //!< VEX|EVEX [RM] (can handle broadcast r32/r64).
+  kEncodingVexRm_VM,                     //!< VEX|EVEX [RM] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRm_T1_4X,                  //!<     EVEX [RM] (used by NN instructions that use RM-T1_4X encoding).
+  kEncodingVexRmi,                       //!< VEX|EVEX [RMI].
+  kEncodingVexRmi_Wx,                    //!< VEX|EVEX [RMI] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRmi_Lx,                    //!< VEX|EVEX [RMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvm,                       //!< VEX|EVEX [RVM].
+  kEncodingVexRvm_Wx,                    //!< VEX|EVEX [RVM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRvm_ZDX_Wx,                //!< VEX|EVEX [RVM<ZDX>] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRvm_Lx,                    //!< VEX|EVEX [RVM] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvm_Lx_KEvex,              //!< VEX|EVEX [RVM] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRvm_Lx_2xK,                //!< VEX|EVEX [RVM] (vp2intersectd/vp2intersectq).
+  kEncodingVexRvmr,                      //!< VEX|EVEX [RVMR].
+  kEncodingVexRvmr_Lx,                   //!< VEX|EVEX [RVMR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmi,                      //!< VEX|EVEX [RVMI].
+  kEncodingVexRvmi_KEvex,                //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRvmi_Lx,                   //!< VEX|EVEX [RVMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmi_Lx_KEvex,             //!< VEX|EVEX [RVMI] (forces EVEX prefix if K register is used on destination).
+  kEncodingVexRmv,                       //!< VEX|EVEX [RMV].
+  kEncodingVexRmv_Wx,                    //!< VEX|EVEX [RMV] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexRmv_VM,                    //!< VEX|EVEX [RMV] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRmvRm_VM,                  //!< VEX|EVEX [RMV|RM] (propagates VEX|EVEX.L, VSIB support).
+  kEncodingVexRmvi,                      //!< VEX|EVEX [RMVI].
+  kEncodingVexRmMr,                      //!< VEX|EVEX [RM|MR].
+  kEncodingVexRmMr_Lx,                   //!< VEX|EVEX [RM|MR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmRmv,                    //!< VEX|EVEX [RVM|RMV].
+  kEncodingVexRvmRmi,                    //!< VEX|EVEX [RVM|RMI].
+  kEncodingVexRvmRmi_Lx,                 //!< VEX|EVEX [RVM|RMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmRmvRmi,                 //!< VEX|EVEX [RVM|RMV|RMI].
+  kEncodingVexRvmMr,                     //!< VEX|EVEX [RVM|MR].
+  kEncodingVexRvmMvr,                    //!< VEX|EVEX [RVM|MVR].
+  kEncodingVexRvmMvr_Lx,                 //!< VEX|EVEX [RVM|MVR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmVmi,                    //!< VEX|EVEX [RVM|VMI].
+  kEncodingVexRvmVmi_Lx,                 //!< VEX|EVEX [RVM|VMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvmVmi_Lx_MEvex,           //!< VEX|EVEX [RVM|VMI] (propagates EVEX if the second operand is memory).
+  kEncodingVexVm,                        //!< VEX|EVEX [VM].
+  kEncodingVexVm_Wx,                     //!< VEX|EVEX [VM] (propagates VEX|EVEX.W if GPQ used).
+  kEncodingVexVmi,                       //!< VEX|EVEX [VMI].
+  kEncodingVexVmi_Lx,                    //!< VEX|EVEX [VMI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexVmi4_Wx,                   //!< VEX|EVEX [VMI] (propagates VEX|EVEX.W if GPQ used, DWORD Immediate).
+  kEncodingVexVmi_Lx_MEvex,              //!< VEX|EVEX [VMI] (force EVEX prefix when the second operand is memory)
+  kEncodingVexRvrmRvmr,                  //!< VEX|EVEX [RVRM|RVMR].
+  kEncodingVexRvrmRvmr_Lx,               //!< VEX|EVEX [RVRM|RVMR] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexRvrmiRvmri_Lx,             //!< VEX|EVEX [RVRMI|RVMRI] (propagates VEX|EVEX.L if YMM used).
+  kEncodingVexMovdMovq,                  //!< VEX|EVEX vmovd, vmovq.
+  kEncodingVexMovssMovsd,                //!< VEX|EVEX vmovss, vmovsd.
+  kEncodingFma4,                         //!< FMA4 [R, R, R/M, R/M].
+  kEncodingFma4_Lx,                      //!< FMA4 [R, R, R/M, R/M] (propagates AVX.L if YMM used).
+  kEncodingAmxCfg,                       //!< AMX ldtilecfg/sttilecfg.
+  kEncodingAmxR,                         //!< AMX [R] - tilezero.
+  kEncodingAmxRm,                        //!< AMX tileloadd/tileloaddt1.
+  kEncodingAmxMr,                        //!< AMX tilestored.
+  kEncodingAmxRmv,                       //!< AMX instructions that use TMM registers.
+  kEncodingCount                         //!< Count of instruction encodings.
+};
+
+//! Additional information table, provides CPU extensions required to execute an instruction and RW flags.
+struct AdditionalInfo {
+  //! Index to `_instFlagsTable`.
+  uint8_t _instFlagsIndex;
+  //! Index to `_rwFlagsTable`.
+  uint8_t _rwFlagsIndex;
+  //! Features vector.
+  uint8_t _features[6];
+
+  inline const uint8_t* featuresBegin() const noexcept { return _features; }
+  inline const uint8_t* featuresEnd() const noexcept { return _features + ASMJIT_ARRAY_SIZE(_features); }
+};
+
+// ${NameLimits:Begin}
+// ------------------- Automatically generated, do not edit -------------------
+enum : uint32_t { kMaxNameSize = 17 };
+// ----------------------------------------------------------------------------
+// ${NameLimits:End}
+
+struct InstNameIndex {
+  uint16_t start;
+  uint16_t end;
+};
+
+struct RWInfo {
+  enum Category : uint8_t {
+    kCategoryGeneric,
+    kCategoryMov,
+    kCategoryMovabs,
+    kCategoryImul,
+    kCategoryMovh64,
+    kCategoryPunpcklxx,
+    kCategoryVmaskmov,
+    kCategoryVmovddup,
+    kCategoryVmovmskpd,
+    kCategoryVmovmskps,
+    kCategoryVmov1_2,
+    kCategoryVmov1_4,
+    kCategoryVmov1_8,
+    kCategoryVmov2_1,
+    kCategoryVmov4_1,
+    kCategoryVmov8_1
+  };
+
+  uint8_t category;
+  uint8_t rmInfo;
+  uint8_t opInfoIndex[6];
+};
+
+struct RWInfoOp {
+  uint64_t rByteMask;
+  uint64_t wByteMask;
+  uint8_t physId;
+  uint8_t consecutiveLeadCount;
+  uint8_t reserved[2];
+  OpRWFlags flags;
+};
+
+//! R/M information.
+//!
+//! This data is used to replace register operand by a memory operand reliably.
+struct RWInfoRm {
+  enum Category : uint8_t {
+    kCategoryNone = 0,
+    kCategoryFixed,
+    kCategoryConsistent,
+    kCategoryHalf,
+    kCategoryQuarter,
+    kCategoryEighth
+  };
+
+  enum Flags : uint8_t {
+    kFlagAmbiguous = 0x01,
+    //! Special semantics for PEXTRW - memory operand can only be used with SSE4.1 instruction and it's forbidden in MMX.
+    kFlagPextrw = 0x02,
+    //! Special semantics for MOVSS and MOVSD - doesn't zero extend the destination if the operation is a reg to reg move.
+    kFlagMovssMovsd = 0x04,
+    //! Special semantics for AVX shift instructions that do not provide reg/mem in AVX/AVX2 mode (AVX-512 is required).
+    kFlagFeatureIfRMI = 0x08
+  };
+
+  uint8_t category;
+  uint8_t rmOpsMask;
+  uint8_t fixedSize;
+  uint8_t flags;
+  uint8_t rmFeature;
+};
+
+struct RWFlagsInfoTable {
+  //! CPU/FPU flags read.
+  uint32_t readFlags;
+  //! CPU/FPU flags written or undefined.
+  uint32_t writeFlags;
+};
+
+extern const uint8_t rwInfoIndexA[Inst::_kIdCount];
+extern const uint8_t rwInfoIndexB[Inst::_kIdCount];
+extern const RWInfo rwInfoA[];
+extern const RWInfo rwInfoB[];
+extern const RWInfoOp rwInfoOp[];
+extern const RWInfoRm rwInfoRm[];
+extern const RWFlagsInfoTable _rwFlagsInfoTable[];
+extern const InstRWFlags _instFlagsTable[];
+
+extern const uint32_t _mainOpcodeTable[];
+extern const uint32_t _altOpcodeTable[];
+
+#ifndef ASMJIT_NO_TEXT
+extern const char _nameData[];
+extern const InstNameIndex instNameIndex[26];
+#endif // !ASMJIT_NO_TEXT
+
+extern const AdditionalInfo _additionalInfoTable[];
+
+} // {InstDB}
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86INSTDB_P_H_INCLUDED
--- a/src/asmjit/x86/x86opcode_p.h
+++ b/src/asmjit/x86/x86opcode_p.h
@ -0,0 +1,436 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86OPCODE_P_H_INCLUDED
+#define ASMJIT_X86_X86OPCODE_P_H_INCLUDED
+
+#include "../x86/x86globals.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! Helper class to store and manipulate X86 opcodes.
+//!
+//! The first 8 least significant bits describe the opcode byte as defined in ISA manuals, all other bits
+//! describe other properties like prefixes, see `Opcode::Bits` for more information.
+struct Opcode {
+  uint32_t v;
+
+  //! Describes a meaning of all bits of AsmJit's 32-bit opcode value.
+  //!
+  //! This schema is AsmJit specific and has been designed to allow encoding of all X86 instructions available. X86,
+  //! MMX, and SSE+ instructions always use `MM` and `PP` fields, which are encoded to corresponding prefixes needed
+  //! by X86 or SIMD instructions. AVX+ instructions embed `MMMMM` and `PP` fields in a VEX prefix, and AVX-512
+  //! instructions embed `MM` and `PP` in EVEX prefix.
+  //!
+  //! The instruction opcode definition uses 1 or 2 bytes as an opcode value. 1 byte is needed by most of the
+  //! instructions, 2 bytes are only used by legacy X87-FPU instructions. This means that a second byte is free to
+  //! by used by instructions encoded by using VEX and/or EVEX prefix.
+  //!
+  //! The fields description:
+  //!
+  //! - `MM` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
+  //!   `mm` and `mmmmm` in instruction manuals.
+  //!
+  //!   NOTE: Since `MM` field is defined as `mmmmm` (5 bits), but only 2 least significant bits are used by VEX and
+  //!   EVEX prefixes, and additional 4th bit is used by XOP prefix, AsmJit uses the 3rd and 5th bit for it's own
+  //!   purposes. These bits will probably never be used in future encodings as AVX512 uses only `000mm` from `mmmmm`.
+  //!
+  //! - `PP` field is used to encode prefixes needed by the instruction or as a part of VEX/EVEX prefix. Described as
+  //!   `pp` in instruction manuals.
+  //!
+  //! - `LL` field is used exclusively by AVX+ and AVX512+ instruction sets. It describes vector size, which is `L.128`
+  //!   for XMM register, `L.256` for for YMM register, and `L.512` for ZMM register. The `LL` field is omitted in case
+  //!   that instruction supports multiple vector lengths, however, if the instruction requires specific `L` value it
+  //!   must be specified as a part of the opcode.
+  //!
+  //!   NOTE: `LL` having value `11` is not defined yet.
+  //!
+  //! - `W` field is the most complicated. It was added by 64-bit architecture to promote default operation width
+  //!   (instructions that perform 32-bit operation by default require to override the width to 64-bit explicitly).
+  //!   There is nothing wrong on this, however, some instructions introduced implicit `W` override, for example a
+  //!   `cdqe` instruction is basically a `cwde` instruction with overridden `W` (set to 1). There are some others
+  //!   in the base X86 instruction set. More recent instruction sets started using `W` field more often:
+  //!
+  //!   - AVX instructions started using `W` field as an extended opcode for FMA, GATHER, PERM, and other instructions.
+  //!     It also uses `W` field to override the default operation width in instructions like `vmovq`.
+  //!
+  //!   - AVX-512 instructions started using `W` field as an extended opcode for all new instructions. This wouldn't
+  //!     have been an issue if the `W` field of AVX-512 have matched AVX, but this is not always the case.
+  //!
+  //! - `O` field is an extended opcode field (3 bits) embedded in ModR/M BYTE.
+  //!
+  //! - `CDSHL` and `CDTT` fields describe 'compressed-displacement'. `CDSHL` is defined for each instruction that is
+  //!   AVX-512 encodable (EVEX) and contains a base N shift (base shift to perform the calculation). The `CDTT` field
+  //!   is derived from instruction specification and describes additional shift to calculate the final `CDSHL` that
+  //!   will be used in SIB byte.
+  //!
+  //! \note Don't reorder any fields here, the shifts and masks were defined carefully to make encoding of X86
+  //! instructions fast, especially to construct REX, VEX, and EVEX prefixes in the most efficient way. Changing
+  //! values defined by these enums many cause AsmJit to emit invalid binary representations of instructions passed to
+  //! `x86::Assembler::_emit`.
+  enum Bits : uint32_t {
+    // MM & VEX & EVEX & XOP
+    // ---------------------
+    //
+    // Two meanings:
+    //  * Part of a legacy opcode (prefixes emitted before the main opcode byte).
+    //  * `MMMMM` field in VEX|EVEX|XOP instruction.
+    //
+    // AVX reserves 5 bits for `MMMMM` field, however AVX instructions only use 2 bits and XOP 3 bits. AVX-512 shrinks
+    // `MMMMM` field into `MMM` so it's safe to use [4:3] bits of `MMMMM` field for internal payload.
+    //
+    // AsmJit divides MMMMM field into this layout:
+    //
+    // [2:0] - Used to describe 0F, 0F38 and 0F3A legacy prefix bytes and 3 bits of MMMMM field for XOP/AVX/AVX512.
+    // [3]   - Required by XOP instructions, so we use this bit also to indicate that this is a XOP opcode.
+    // [4]   - Used to force EVEX prefix - this bit is not used by any X86 instruction yet, so AsmJit uses it to
+    //         describe EVEX only instructions or sets its bit when user uses InstOptions::kX86_Evex to force EVEX.
+    kMM_Shift      = 8,
+    kMM_Mask       = 0x1Fu << kMM_Shift,
+    kMM_00         = 0x00u << kMM_Shift,
+    kMM_0F         = 0x01u << kMM_Shift,
+    kMM_0F38       = 0x02u << kMM_Shift,
+    kMM_0F3A       = 0x03u << kMM_Shift,   // Described also as XOP.M3 in AMD manuals.
+    kMM_0F01       = 0x04u << kMM_Shift,   // AsmJit way to describe 0F01 (never VEX/EVEX).
+
+    kMM_MAP5       = 0x05u << kMM_Shift,   // EVEX.MAP5.
+    kMM_MAP6       = 0x06u << kMM_Shift,   // EVEX.MAP6.
+
+    // `XOP` field is only used to force XOP prefix instead of VEX3 prefix. We know XOP encodings always use 0b1000
+    // bit of MM field and that no VEX and EVEX instruction use such bit yet, so we can use this bit to force XOP
+    // prefix to be emitted instead of VEX3 prefix. See `x86VEXPrefix` defined in `x86assembler.cpp`.
+    kMM_XOP08      = 0x08u << kMM_Shift,   // XOP.M8.
+    kMM_XOP09      = 0x09u << kMM_Shift,   // XOP.M9.
+    kMM_XOP0A      = 0x0Au << kMM_Shift,   // XOP.MA.
+
+    kMM_IsXOP_Shift= kMM_Shift + 3,
+    kMM_IsXOP      = kMM_XOP08,
+
+    // NOTE: Force VEX3 allows to force to emit VEX3 instead of VEX2 in some cases (similar to forcing REX prefix).
+    // Force EVEX will force emitting EVEX prefix instead of VEX2|VEX3. EVEX-only instructions will have ForceEvex
+    // always set, however. instructions that can be encoded by either VEX or EVEX prefix should not have ForceEvex
+    // set.
+    kMM_ForceEvex  = 0x10u << kMM_Shift,   // Force 4-BYTE EVEX prefix.
+
+    // FPU_2B - Second-Byte of the Opcode used by FPU
+    // ----------------------------------------------
+    //
+    // Second byte opcode. This BYTE is ONLY used by FPU instructions and collides with 3 bits from `MM` and 5 bits
+    // from 'CDSHL' and 'CDTT'. It's fine as FPU and AVX512 flags are never used at the same time.
+    kFPU_2B_Shift  = 10,
+    kFPU_2B_Mask   = 0xFF << kFPU_2B_Shift,
+
+    // CDSHL & CDTT
+    // ------------
+    //
+    // Compressed displacement bits.
+    //
+    // Each opcode defines the base size (N) shift:
+    //   [0]: BYTE  (1 byte).
+    //   [1]: WORD  (2 bytes).
+    //   [2]: DWORD (4 bytes - float/int32).
+    //   [3]: QWORD (8 bytes - double/int64).
+    //   [4]: OWORD (16 bytes - used by FV|FVM|M128).
+    //
+    // Which is then scaled by the instruction's TT (TupleType) into possible:
+    //   [5]: YWORD (32 bytes)
+    //   [6]: ZWORD (64 bytes)
+    //
+    // These bits are then adjusted before calling EmitModSib or EmitModVSib.
+    kCDSHL_Shift   = 13,
+    kCDSHL_Mask    = 0x7u << kCDSHL_Shift,
+
+    kCDSHL__       = 0x0u << kCDSHL_Shift, // Base element size not used.
+    kCDSHL_0       = 0x0u << kCDSHL_Shift, // N << 0.
+    kCDSHL_1       = 0x1u << kCDSHL_Shift, // N << 1.
+    kCDSHL_2       = 0x2u << kCDSHL_Shift, // N << 2.
+    kCDSHL_3       = 0x3u << kCDSHL_Shift, // N << 3.
+    kCDSHL_4       = 0x4u << kCDSHL_Shift, // N << 4.
+    kCDSHL_5       = 0x5u << kCDSHL_Shift, // N << 5.
+
+    // Compressed displacement tuple-type (specific to AsmJit).
+    //
+    // Since we store the base offset independently of CDTT we can simplify the number of 'TUPLE_TYPE' groups
+    // significantly and just handle special cases.
+    kCDTT_Shift    = 16,
+    kCDTT_Mask     = 0x3u << kCDTT_Shift,
+    kCDTT_None     = 0x0u << kCDTT_Shift,  // Does nothing.
+    kCDTT_ByLL     = 0x1u << kCDTT_Shift,  // Scales by LL (1x 2x 4x).
+    kCDTT_T1W      = 0x2u << kCDTT_Shift,  // Used to add 'W' to the shift.
+    kCDTT_DUP      = 0x3u << kCDTT_Shift,  // Special 'VMOVDDUP' case.
+
+    // Aliases that match names used in instruction manuals.
+    kCDTT__        = kCDTT_None,
+    kCDTT_FV       = kCDTT_ByLL,
+    kCDTT_HV       = kCDTT_ByLL,
+    kCDTT_QV       = kCDTT_ByLL,
+    kCDTT_FVM      = kCDTT_ByLL,
+    kCDTT_T1S      = kCDTT_None,
+    kCDTT_T1F      = kCDTT_None,
+    kCDTT_T1_4X    = kCDTT_None,
+    kCDTT_T4X      = kCDTT_None,           // Alias to have only 3 letters.
+    kCDTT_T2       = kCDTT_None,
+    kCDTT_T4       = kCDTT_None,
+    kCDTT_T8       = kCDTT_None,
+    kCDTT_HVM      = kCDTT_ByLL,
+    kCDTT_QVM      = kCDTT_ByLL,
+    kCDTT_OVM      = kCDTT_ByLL,
+    kCDTT_128      = kCDTT_None,
+
+    // `O` Field in ModR/M (??:xxx:???)
+    // --------------------------------
+
+    kModO_Shift    = 18,
+    kModO_Mask     = 0x7u << kModO_Shift,
+
+    kModO__        = 0x0u,
+    kModO_0        = 0x0u << kModO_Shift,
+    kModO_1        = 0x1u << kModO_Shift,
+    kModO_2        = 0x2u << kModO_Shift,
+    kModO_3        = 0x3u << kModO_Shift,
+    kModO_4        = 0x4u << kModO_Shift,
+    kModO_5        = 0x5u << kModO_Shift,
+    kModO_6        = 0x6u << kModO_Shift,
+    kModO_7        = 0x7u << kModO_Shift,
+
+    // `RM` Field in ModR/M (??:???:xxx)
+    // ---------------------------------
+    //
+    // Second data field used by ModR/M byte. This is only used by few instructions that use OPCODE+MOD/RM where both
+    // values in Mod/RM are part of the opcode.
+
+    kModRM_Shift    = 13,
+    kModRM_Mask     = 0x7u << kModRM_Shift,
+
+    kModRM__        = 0x0u,
+    kModRM_0        = 0x0u << kModRM_Shift,
+    kModRM_1        = 0x1u << kModRM_Shift,
+    kModRM_2        = 0x2u << kModRM_Shift,
+    kModRM_3        = 0x3u << kModRM_Shift,
+    kModRM_4        = 0x4u << kModRM_Shift,
+    kModRM_5        = 0x5u << kModRM_Shift,
+    kModRM_6        = 0x6u << kModRM_Shift,
+    kModRM_7        = 0x7u << kModRM_Shift,
+
+    // `PP` Field
+    // ----------
+    //
+    // These fields are stored deliberately right after each other as it makes it easier to construct VEX prefix from
+    // the opcode value stored in the instruction database.
+    //
+    // Two meanings:
+    //   * "PP" field in AVX/XOP/AVX-512 instruction.
+    //   * Mandatory Prefix in legacy encoding.
+    //
+    // AVX reserves 2 bits for `PP` field, but AsmJit extends the storage by 1 more bit that is used to emit 9B prefix
+    // for some X87-FPU instructions.
+
+    kPP_Shift      = 21,
+    kPP_VEXMask    = 0x03u << kPP_Shift,   // PP field mask used by VEX/EVEX.
+    kPP_FPUMask    = 0x07u << kPP_Shift,   // Mask used by EMIT_PP, also includes '0x9B'.
+    kPP_00         = 0x00u << kPP_Shift,
+    kPP_66         = 0x01u << kPP_Shift,
+    kPP_F3         = 0x02u << kPP_Shift,
+    kPP_F2         = 0x03u << kPP_Shift,
+
+    kPP_9B         = 0x07u << kPP_Shift,   // AsmJit specific to emit FPU's '9B' byte.
+
+    // REX|VEX|EVEX B|X|R|W Bits
+    // -------------------------
+    //
+    // NOTE: REX.[B|X|R] are never stored within the opcode itself, they are reserved by AsmJit are are added
+    // dynamically to the opcode to represent [REX|VEX|EVEX].[B|X|R] bits. REX.W can be stored in DB as it's sometimes
+    // part of the opcode itself.
+
+    // These must be binary compatible with instruction options.
+    kREX_Shift     = 24,
+    kREX_Mask      = 0x0Fu << kREX_Shift,
+    kB             = 0x01u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kX             = 0x02u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kR             = 0x04u << kREX_Shift,  // Never stored in DB, used by encoder.
+    kW             = 0x08u << kREX_Shift,
+    kW_Shift       = kREX_Shift + 3,
+
+    kW__           = 0u << kW_Shift,       // REX.W/VEX.W is unspecified.
+    kW_x           = 0u << kW_Shift,       // REX.W/VEX.W is based on instruction operands.
+    kW_I           = 0u << kW_Shift,       // REX.W/VEX.W is ignored (WIG).
+    kW_0           = 0u << kW_Shift,       // REX.W/VEX.W is 0 (W0).
+    kW_1           = 1u << kW_Shift,       // REX.W/VEX.W is 1 (W1).
+
+    // EVEX.W Field
+    // ------------
+    //
+    // `W` field used by EVEX instruction encoding.
+
+    kEvex_W_Shift  = 28,
+    kEvex_W_Mask   = 1u << kEvex_W_Shift,
+
+    kEvex_W__      = 0u << kEvex_W_Shift,  // EVEX.W is unspecified (not EVEX instruction).
+    kEvex_W_x      = 0u << kEvex_W_Shift,  // EVEX.W is based on instruction operands.
+    kEvex_W_I      = 0u << kEvex_W_Shift,  // EVEX.W is ignored (WIG).
+    kEvex_W_0      = 0u << kEvex_W_Shift,  // EVEX.W is 0 (W0).
+    kEvex_W_1      = 1u << kEvex_W_Shift,  // EVEX.W is 1 (W1).
+
+    // `L` or `LL` field in AVX/XOP/AVX-512
+    // ------------------------------------
+    //
+    // VEX/XOP prefix can only use the first bit `L.128` or `L.256`. EVEX prefix prefix makes it possible to use also
+    // `L.512`. If the instruction set manual describes an instruction by `LIG` it means that the `L` field is ignored
+    // and AsmJit defaults to `0` in such case.
+    kLL_Shift      = 29,
+    kLL_Mask       = 0x3u << kLL_Shift,
+
+    kLL__          = 0x0u << kLL_Shift,    // LL is unspecified.
+    kLL_x          = 0x0u << kLL_Shift,    // LL is based on instruction operands.
+    kLL_I          = 0x0u << kLL_Shift,    // LL is ignored (LIG).
+    kLL_0          = 0x0u << kLL_Shift,    // LL is 0 (L.128).
+    kLL_1          = 0x1u << kLL_Shift,    // LL is 1 (L.256).
+    kLL_2          = 0x2u << kLL_Shift,    // LL is 2 (L.512).
+
+    // Opcode Combinations
+    // -------------------
+
+    k0      = 0,                           // '__' (no prefix, used internally).
+    k000000 = kPP_00 | kMM_00,             // '__' (no prefix, to be the same width as others).
+    k000F00 = kPP_00 | kMM_0F,             // '0F'
+    k000F01 = kPP_00 | kMM_0F01,           // '0F01'
+    k000F0F = kPP_00 | kMM_0F,             // '0F0F' - 3DNOW, equal to 0x0F, must have special encoding to take effect.
+    k000F38 = kPP_00 | kMM_0F38,           // 'NP.0F38'
+    k000F3A = kPP_00 | kMM_0F3A,           // 'NP.0F3A'
+    k00MAP5 = kPP_00 | kMM_MAP5,           // 'NP.MAP5'
+    k00MAP6 = kPP_00 | kMM_MAP6,           // 'NP.MAP5'
+    k660000 = kPP_66 | kMM_00,             // '66'
+    k660F00 = kPP_66 | kMM_0F,             // '66.0F'
+    k660F01 = kPP_66 | kMM_0F01,           // '66.0F01'
+    k660F38 = kPP_66 | kMM_0F38,           // '66.0F38'
+    k660F3A = kPP_66 | kMM_0F3A,           // '66.0F3A'
+    k66MAP5 = kPP_66 | kMM_MAP5,           // '66.MAP5'
+    k66MAP6 = kPP_66 | kMM_MAP6,           // '66.MAP5'
+    kF20000 = kPP_F2 | kMM_00,             // 'F2'
+    kF20F00 = kPP_F2 | kMM_0F,             // 'F2.0F'
+    kF20F01 = kPP_F2 | kMM_0F01,           // 'F2.0F01'
+    kF20F38 = kPP_F2 | kMM_0F38,           // 'F2.0F38'
+    kF20F3A = kPP_F2 | kMM_0F3A,           // 'F2.0F3A'
+    kF2MAP5 = kPP_F2 | kMM_MAP5,           // 'F2.MAP5'
+    kF2MAP6 = kPP_F2 | kMM_MAP6,           // 'F2.MAP5'
+    kF30000 = kPP_F3 | kMM_00,             // 'F3'
+    kF30F00 = kPP_F3 | kMM_0F,             // 'F3.0F'
+    kF30F01 = kPP_F3 | kMM_0F01,           // 'F3.0F01'
+    kF30F38 = kPP_F3 | kMM_0F38,           // 'F3.0F38'
+    kF30F3A = kPP_F3 | kMM_0F3A,           // 'F3.0F3A'
+    kF3MAP5 = kPP_F3 | kMM_MAP5,           // 'F3.MAP5'
+    kF3MAP6 = kPP_F3 | kMM_MAP6,           // 'F3.MAP5'
+    kFPU_00 = kPP_00 | kMM_00,             // '__' (FPU)
+    kFPU_9B = kPP_9B | kMM_00,             // '9B' (FPU)
+    kXOP_M8 = kPP_00 | kMM_XOP08,          // 'M8' (XOP)
+    kXOP_M9 = kPP_00 | kMM_XOP09,          // 'M9' (XOP)
+    kXOP_MA = kPP_00 | kMM_XOP0A           // 'MA' (XOP)
+  };
+
+  // Opcode Builder
+  // --------------
+
+  inline uint32_t get() const noexcept { return v; }
+
+  inline bool hasW() const noexcept { return (v & kW) != 0; }
+  inline bool has66h() const noexcept { return (v & kPP_66) != 0; }
+
+  inline Opcode& add(uint32_t x) noexcept { return operator+=(x); }
+
+  inline Opcode& add66h() noexcept { return operator|=(kPP_66); }
+  template<typename T>
+  inline Opcode& add66hIf(T exp) noexcept { return operator|=(uint32_t(exp) << kPP_Shift); }
+  template<typename T>
+  inline Opcode& add66hBySize(T size) noexcept { return add66hIf(size == 2); }
+
+  inline Opcode& addW() noexcept { return operator|=(kW); }
+  template<typename T>
+  inline Opcode& addWIf(T exp) noexcept { return operator|=(uint32_t(exp) << kW_Shift); }
+  template<typename T>
+  inline Opcode& addWBySize(T size) noexcept { return addWIf(size == 8); }
+
+  template<typename T>
+  inline Opcode& addPrefixBySize(T size) noexcept {
+    static const uint32_t mask[16] = {
+      0,          // #0
+      0,          // #1 -> nothing (already handled or not possible)
+      kPP_66,     // #2 -> 66H
+      0,          // #3
+      0,          // #4 -> nothing
+      0,          // #5
+      0,          // #6
+      0,          // #7
+      kW          // #8 -> REX.W
+    };
+    return operator|=(mask[size & 0xF]);
+  }
+
+  template<typename T>
+  inline Opcode& addArithBySize(T size) noexcept {
+    static const uint32_t mask[16] = {
+      0,          // #0
+      0,          // #1 -> nothing
+      1 | kPP_66, // #2 -> NOT_BYTE_OP(1) and 66H
+      0,          // #3
+      1,          // #4 -> NOT_BYTE_OP(1)
+      0,          // #5
+      0,          // #6
+      0,          // #7
+      1 | kW      // #8 -> NOT_BYTE_OP(1) and REX.W
+    };
+    return operator|=(mask[size & 0xF]);
+  }
+
+  inline Opcode& forceEvex() noexcept { return operator|=(kMM_ForceEvex); }
+  template<typename T>
+  inline Opcode& forceEvexIf(T exp) noexcept { return operator|=(uint32_t(exp) << Support::ConstCTZ<uint32_t(kMM_ForceEvex)>::value); }
+
+  //! Extract `O` field (R) from the opcode (specified as /0..7 in instruction manuals).
+  inline uint32_t extractModO() const noexcept {
+    return (v >> kModO_Shift) & 0x07;
+  }
+
+  //! Extract `RM` field (RM) from the opcode (usually specified as another opcode value).
+  inline uint32_t extractModRM() const noexcept {
+    return (v >> kModRM_Shift) & 0x07;
+  }
+
+  //! Extract `REX` prefix from opcode combined with `options`.
+  inline uint32_t extractRex(InstOptions options) const noexcept {
+    // kREX was designed in a way that when shifted there will be no bytes set except REX.[B|X|R|W].
+    // The returned value forms a real REX prefix byte. This case should be unit-tested as well.
+    return (v | uint32_t(options)) >> kREX_Shift;
+  }
+
+  inline uint32_t extractLLMMMMM(InstOptions options) const noexcept {
+    uint32_t llMmmmm = uint32_t(v & (kLL_Mask | kMM_Mask));
+    uint32_t vexEvex = uint32_t(options & InstOptions::kX86_Evex);
+    return (llMmmmm | vexEvex) >> kMM_Shift;
+  }
+
+  inline Opcode& operator=(uint32_t x) noexcept { v = x; return *this; }
+  inline Opcode& operator+=(uint32_t x) noexcept { v += x; return *this; }
+  inline Opcode& operator-=(uint32_t x) noexcept { v -= x; return *this; }
+  inline Opcode& operator&=(uint32_t x) noexcept { v &= x; return *this; }
+  inline Opcode& operator|=(uint32_t x) noexcept { v |= x; return *this; }
+  inline Opcode& operator^=(uint32_t x) noexcept { v ^= x; return *this; }
+
+  inline uint32_t operator&(uint32_t x) const noexcept { return v & x; }
+  inline uint32_t operator|(uint32_t x) const noexcept { return v | x; }
+  inline uint32_t operator^(uint32_t x) const noexcept { return v ^ x; }
+  inline uint32_t operator<<(uint32_t x) const noexcept { return v << x; }
+  inline uint32_t operator>>(uint32_t x) const noexcept { return v >> x; }
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // ASMJIT_X86_X86OPCODE_P_H_INCLUDED
--- a/src/asmjit/x86/x86operand.cpp
+++ b/src/asmjit/x86/x86operand.cpp
@ -0,0 +1,231 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#include "../core/api-build_p.h"
+#if !defined(ASMJIT_NO_X86)
+
+#include "../core/misc_p.h"
+#include "../x86/x86operand.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+// x86::Operand - Tests
+// ====================
+
+#if defined(ASMJIT_TEST)
+UNIT(x86_operand) {
+  Label L(1000); // Label with some ID.
+
+  INFO("Checking basic properties of built-in X86 registers");
+  EXPECT(gpb(Gp::kIdAx) == al);
+  EXPECT(gpb(Gp::kIdBx) == bl);
+  EXPECT(gpb(Gp::kIdCx) == cl);
+  EXPECT(gpb(Gp::kIdDx) == dl);
+
+  EXPECT(gpb_lo(Gp::kIdAx) == al);
+  EXPECT(gpb_lo(Gp::kIdBx) == bl);
+  EXPECT(gpb_lo(Gp::kIdCx) == cl);
+  EXPECT(gpb_lo(Gp::kIdDx) == dl);
+
+  EXPECT(gpb_hi(Gp::kIdAx) == ah);
+  EXPECT(gpb_hi(Gp::kIdBx) == bh);
+  EXPECT(gpb_hi(Gp::kIdCx) == ch);
+  EXPECT(gpb_hi(Gp::kIdDx) == dh);
+
+  EXPECT(gpw(Gp::kIdAx) == ax);
+  EXPECT(gpw(Gp::kIdBx) == bx);
+  EXPECT(gpw(Gp::kIdCx) == cx);
+  EXPECT(gpw(Gp::kIdDx) == dx);
+
+  EXPECT(gpd(Gp::kIdAx) == eax);
+  EXPECT(gpd(Gp::kIdBx) == ebx);
+  EXPECT(gpd(Gp::kIdCx) == ecx);
+  EXPECT(gpd(Gp::kIdDx) == edx);
+
+  EXPECT(gpq(Gp::kIdAx) == rax);
+  EXPECT(gpq(Gp::kIdBx) == rbx);
+  EXPECT(gpq(Gp::kIdCx) == rcx);
+  EXPECT(gpq(Gp::kIdDx) == rdx);
+
+  EXPECT(gpb(Gp::kIdAx) != dl);
+  EXPECT(gpw(Gp::kIdBx) != cx);
+  EXPECT(gpd(Gp::kIdCx) != ebx);
+  EXPECT(gpq(Gp::kIdDx) != rax);
+
+  INFO("Checking if x86::reg(...) matches built-in IDs");
+  EXPECT(gpb(5) == bpl);
+  EXPECT(gpw(5) == bp);
+  EXPECT(gpd(5) == ebp);
+  EXPECT(gpq(5) == rbp);
+  EXPECT(st(5)  == st5);
+  EXPECT(mm(5)  == mm5);
+  EXPECT(k(5)   == k5);
+  EXPECT(cr(5)  == cr5);
+  EXPECT(dr(5)  == dr5);
+  EXPECT(xmm(5) == xmm5);
+  EXPECT(ymm(5) == ymm5);
+  EXPECT(zmm(5) == zmm5);
+
+  INFO("Checking x86::Gp register properties");
+  EXPECT(Gp().isReg() == true);
+  EXPECT(eax.isReg() == true);
+  EXPECT(eax.id() == 0);
+  EXPECT(eax.size() == 4);
+  EXPECT(eax.type() == RegType::kX86_Gpd);
+  EXPECT(eax.group() == RegGroup::kGp);
+
+  INFO("Checking x86::Xmm register properties");
+  EXPECT(Xmm().isReg() == true);
+  EXPECT(xmm4.isReg() == true);
+  EXPECT(xmm4.id() == 4);
+  EXPECT(xmm4.size() == 16);
+  EXPECT(xmm4.type() == RegType::kX86_Xmm);
+  EXPECT(xmm4.group() == RegGroup::kVec);
+  EXPECT(xmm4.isVec());
+
+  INFO("Checking x86::Ymm register properties");
+  EXPECT(Ymm().isReg() == true);
+  EXPECT(ymm5.isReg() == true);
+  EXPECT(ymm5.id() == 5);
+  EXPECT(ymm5.size() == 32);
+  EXPECT(ymm5.type() == RegType::kX86_Ymm);
+  EXPECT(ymm5.group() == RegGroup::kVec);
+  EXPECT(ymm5.isVec());
+
+  INFO("Checking x86::Zmm register properties");
+  EXPECT(Zmm().isReg() == true);
+  EXPECT(zmm6.isReg() == true);
+  EXPECT(zmm6.id() == 6);
+  EXPECT(zmm6.size() == 64);
+  EXPECT(zmm6.type() == RegType::kX86_Zmm);
+  EXPECT(zmm6.group() == RegGroup::kVec);
+  EXPECT(zmm6.isVec());
+
+  INFO("Checking x86::Vec register properties");
+  EXPECT(Vec().isReg() == true);
+  // Converts a VEC register to a type of the passed register, but keeps the ID.
+  EXPECT(xmm4.cloneAs(ymm10) == ymm4);
+  EXPECT(xmm4.cloneAs(zmm11) == zmm4);
+  EXPECT(ymm5.cloneAs(xmm12) == xmm5);
+  EXPECT(ymm5.cloneAs(zmm13) == zmm5);
+  EXPECT(zmm6.cloneAs(xmm14) == xmm6);
+  EXPECT(zmm6.cloneAs(ymm15) == ymm6);
+
+  EXPECT(xmm7.xmm() == xmm7);
+  EXPECT(xmm7.ymm() == ymm7);
+  EXPECT(xmm7.zmm() == zmm7);
+
+  EXPECT(ymm7.xmm() == xmm7);
+  EXPECT(ymm7.ymm() == ymm7);
+  EXPECT(ymm7.zmm() == zmm7);
+
+  EXPECT(zmm7.xmm() == xmm7);
+  EXPECT(zmm7.ymm() == ymm7);
+  EXPECT(zmm7.zmm() == zmm7);
+
+  INFO("Checking x86::Mm register properties");
+  EXPECT(Mm().isReg() == true);
+  EXPECT(mm2.isReg() == true);
+  EXPECT(mm2.id() == 2);
+  EXPECT(mm2.size() == 8);
+  EXPECT(mm2.type() == RegType::kX86_Mm);
+  EXPECT(mm2.group() == RegGroup::kX86_MM);
+
+  INFO("Checking x86::KReg register properties");
+  EXPECT(KReg().isReg() == true);
+  EXPECT(k3.isReg() == true);
+  EXPECT(k3.id() == 3);
+  EXPECT(k3.size() == 0);
+  EXPECT(k3.type() == RegType::kX86_KReg);
+  EXPECT(k3.group() == RegGroup::kX86_K);
+
+  INFO("Checking x86::St register properties");
+  EXPECT(St().isReg() == true);
+  EXPECT(st1.isReg() == true);
+  EXPECT(st1.id() == 1);
+  EXPECT(st1.size() == 10);
+  EXPECT(st1.type() == RegType::kX86_St);
+  EXPECT(st1.group() == RegGroup::kX86_St);
+
+  INFO("Checking if default constructed regs behave as expected");
+  EXPECT(Reg().isValid() == false);
+  EXPECT(Gp().isValid() == false);
+  EXPECT(Xmm().isValid() == false);
+  EXPECT(Ymm().isValid() == false);
+  EXPECT(Zmm().isValid() == false);
+  EXPECT(Mm().isValid() == false);
+  EXPECT(KReg().isValid() == false);
+  EXPECT(SReg().isValid() == false);
+  EXPECT(CReg().isValid() == false);
+  EXPECT(DReg().isValid() == false);
+  EXPECT(St().isValid() == false);
+  EXPECT(Bnd().isValid() == false);
+
+  INFO("Checking x86::Mem operand");
+  Mem m;
+  EXPECT(m == Mem(), "Two default constructed x86::Mem operands must be equal");
+
+  m = ptr(L);
+  EXPECT(m.hasBase() == true);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasBaseLabel() == true);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == false);
+  EXPECT(m.offset() == 0);
+  EXPECT(m.offsetLo32() == 0);
+
+  m = ptr(0x0123456789ABCDEFu);
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+  EXPECT(m.hasOffset() == true);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
+  EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
+  m.addOffset(1);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDF0u));
+
+  m = ptr(0x0123456789ABCDEFu, rdi, 3);
+  EXPECT(m.hasSegment() == false);
+  EXPECT(m.hasBase() == false);
+  EXPECT(m.hasBaseReg() == false);
+  EXPECT(m.hasIndex() == true);
+  EXPECT(m.hasIndexReg() == true);
+  EXPECT(m.indexType() == rdi.type());
+  EXPECT(m.indexId() == rdi.id());
+  EXPECT(m.shift() == 3);
+  EXPECT(m.hasOffset() == true);
+  EXPECT(m.isOffset64Bit() == true);
+  EXPECT(m.offset() == int64_t(0x0123456789ABCDEFu));
+  EXPECT(m.offsetLo32() == int32_t(0x89ABCDEFu));
+  m.resetIndex();
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+
+  m = ptr(rax);
+  EXPECT(m.hasBase() == true);
+  EXPECT(m.hasBaseReg() == true);
+  EXPECT(m.baseType() == rax.type());
+  EXPECT(m.baseId() == rax.id());
+  EXPECT(m.hasIndex() == false);
+  EXPECT(m.hasIndexReg() == false);
+  EXPECT(m.indexType() == RegType::kNone);
+  EXPECT(m.indexId() == 0);
+  EXPECT(m.hasOffset() == false);
+  EXPECT(m.isOffset64Bit() == false);
+  EXPECT(m.offset() == 0);
+  EXPECT(m.offsetLo32() == 0);
+  m.setIndex(rsi);
+  EXPECT(m.hasIndex() == true);
+  EXPECT(m.hasIndexReg() == true);
+  EXPECT(m.indexType() == rsi.type());
+  EXPECT(m.indexId() == rsi.id());
+}
+#endif
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_X86
--- a/src/asmjit/x86/x86operand.h
+++ b/src/asmjit/x86/x86operand.h
--- a/src/asmjit/x86/x86rapass.cpp
+++ b/src/asmjit/x86/x86rapass.cpp
--- a/src/asmjit/x86/x86rapass_p.h
+++ b/src/asmjit/x86/x86rapass_p.h
@ -0,0 +1,94 @@
+// This file is part of AsmJit project <https://asmjit.com>
+//
+// See asmjit.h or LICENSE.md for license and copyright information
+// SPDX-License-Identifier: Zlib
+
+#ifndef ASMJIT_X86_X86RAPASS_P_H_INCLUDED
+#define ASMJIT_X86_X86RAPASS_P_H_INCLUDED
+
+#include "../core/api-config.h"
+#ifndef ASMJIT_NO_COMPILER
+
+#include "../core/compiler.h"
+#include "../core/rabuilders_p.h"
+#include "../core/rapass_p.h"
+#include "../x86/x86assembler.h"
+#include "../x86/x86compiler.h"
+#include "../x86/x86emithelper_p.h"
+
+ASMJIT_BEGIN_SUB_NAMESPACE(x86)
+
+//! \cond INTERNAL
+//! \addtogroup asmjit_x86
+//! \{
+
+//! X86 register allocation pass.
+//!
+//! Takes care of generating function prologs and epilogs, and also performs register allocation.
+class X86RAPass : public BaseRAPass {
+public:
+  ASMJIT_NONCOPYABLE(X86RAPass)
+  typedef BaseRAPass Base;
+
+  EmitHelper _emitHelper;
+
+  //! \name Construction & Destruction
+  //! \{
+
+  X86RAPass() noexcept;
+  virtual ~X86RAPass() noexcept;
+
+  //! \}
+
+  //! \name Accessors
+  //! \{
+
+  //! Returns the compiler casted to `x86::Compiler`.
+  inline Compiler* cc() const noexcept { return static_cast<Compiler*>(_cb); }
+
+  //! Returns emit helper.
+  inline EmitHelper* emitHelper() noexcept { return &_emitHelper; }
+
+  inline bool avxEnabled() const noexcept { return _emitHelper._avxEnabled; }
+  inline bool avx512Enabled() const noexcept { return _emitHelper._avx512Enabled; }
+
+  //! \}
+
+  //! \name Utilities
+  //! \{
+
+  inline uint32_t choose(uint32_t sseInstId, uint32_t avxInstId) noexcept {
+    return avxEnabled() ? avxInstId : sseInstId;
+  }
+
+  //! \}
+
+  //! \name Interface
+  //! \{
+
+  void onInit() noexcept override;
+  void onDone() noexcept override;
+
+  Error buildCFG() noexcept override;
+
+  Error _rewrite(BaseNode* first, BaseNode* stop) noexcept override;
+
+  Error emitMove(uint32_t workId, uint32_t dstPhysId, uint32_t srcPhysId) noexcept override;
+  Error emitSwap(uint32_t aWorkId, uint32_t aPhysId, uint32_t bWorkId, uint32_t bPhysId) noexcept override;
+
+  Error emitLoad(uint32_t workId, uint32_t dstPhysId) noexcept override;
+  Error emitSave(uint32_t workId, uint32_t srcPhysId) noexcept override;
+
+  Error emitJump(const Label& label) noexcept override;
+  Error emitPreCall(InvokeNode* invokeNode) noexcept override;
+
+  //! \}
+};
+
+//! \}
+//! \endcond
+
+ASMJIT_END_SUB_NAMESPACE
+
+#endif // !ASMJIT_NO_COMPILER
+#endif // ASMJIT_X86_X86RAPASS_P_H_INCLUDED