X86/X64 compiler implementation.
Compiler Basics
The first x86::Compiler example shows how to generate a function that simply returns an integer value. It's an analogy to the first Assembler example:
#include <asmjit/x86.h>
#include <stdio.h>
typedef int (*Func)(void);
int main() {
cc.addFunc(FuncSignature::build<int>());
cc.mov(vReg, 1);
cc.ret(vReg);
cc.endFunc();
cc.finalize();
Func fn;
if (err) return 1;
int result = fn();
printf("%d\n", result);
return 0;
}
The BaseCompiler::addFunc() and BaseCompiler::endFunc() functions are used to define the function and its end. Both must be called per function, but the body doesn't have to be generated in sequence. An example of generating two functions will be shown later. The next example shows more complicated code that contain a loop and generates a simple memory copy function that uses uint32_t
items:
#include <asmjit/x86.h>
#include <stdio.h>
typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count);
int main() {
uint32_t*,
const uint32_t*,
size_t>());
Label L_Loop = cc.newLabel();
Label L_Exit = cc.newLabel();
cc.test(i, i);
cc.jz(L_Exit);
cc.bind(L_Loop);
cc.mov(tmp, x86::dword_ptr(src));
cc.mov(x86::dword_ptr(dst), tmp);
cc.add(src, 4);
cc.add(dst, 4);
cc.dec(i);
cc.jnz(L_Loop);
cc.bind(L_Exit);
cc.endFunc();
cc.finalize();
MemCpy32 memcpy32;
if (err)
return 1;
uint32_t input[6] = { 1, 2, 3, 5, 8, 13 };
uint32_t output[6];
memcpy32(output, input, 6);
for (uint32_t i = 0; i < 6; i++)
printf("%d\n", output[i]);
return 0;
}
AVX and AVX-512
AVX and AVX-512 code generation must be explicitly enabled via FuncFrame to work properly. If it's not setup correctly then Prolog & Epilog would use SSE instead of AVX instructions to work with SIMD registers. In addition, Compiler requires explicitly enable AVX-512 via FuncFrame in order to use all 32 SIMD registers.
#include <asmjit/x86.h>
#include <stdio.h>
typedef void (*Func)(void*);
int main() {
FuncNode* funcNode = cc.addFunc(FuncSignature::build<void, void*>());
x86::Gp addr = cc.newIntPtr(
"addr");
cc.vpaddq(vreg, vreg, vreg);
cc.endFunc();
cc.finalize();
Func fn;
if (err) return 1;
uint64_t data[] = { 1, 2, 3, 4, 5, 6, 7, 8 };
fn(data);
printf("%llu\n", (unsigned long long)data[0]);
return 0;
}
Recursive Functions
It's possible to create more functions by using the same x86::Compiler instance and make links between them. In such case it's important to keep the pointer to FuncNode.
The example below creates a simple Fibonacci function that calls itself recursively:
#include <asmjit/x86.h>
#include <stdio.h>
typedef uint32_t (*Fibonacci)(uint32_t x);
int main() {
FuncSignature::build<int, int>());
Label L_Exit = cc.newLabel();
cc.cmp(x, 3);
cc.jb(L_Exit);
cc.mov(y, x);
cc.dec(x);
cc.invoke(&invokeNode,
FuncSignature::build<int, int>());
cc.add(x, y);
cc.bind(L_Exit);
cc.ret(x);
cc.endFunc();
cc.finalize();
Fibonacci fib;
if (err) return 1;
printf("Fib(%u) -> %u\n", 8, fib(8));
return 0;
}
Stack Management
Function's stack-frame is managed automatically, which is used by the register allocator to spill virtual registers. It also provides an interface to allocate user-defined block of the stack, which can be used as a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated, filled by bytes starting from 0 to 255 and then iterated again to sum all the values.
#include <asmjit/x86.h>
#include <stdio.h>
typedef int (*Func)(void);
int main() {
cc.addFunc(FuncSignature::build<int>());
stackIdx.setIndex(i);
stackIdx.setSize(1);
cc.lea(p, stack);
cc.xor_(i, i);
Label L1 = cc.newLabel();
Label L2 = cc.newLabel();
cc.bind(L1);
cc.mov(stackIdx, i.
r8());
cc.inc(i);
cc.cmp(i, 256);
cc.jb(L1);
cc.xor_(i, i);
cc.xor_(sum, sum);
cc.bind(L2);
cc.movzx(val, stackIdx);
cc.add(sum, val);
cc.inc(i);
cc.cmp(i, 256);
cc.jb(L2);
cc.ret(sum);
cc.endFunc();
cc.finalize();
if (err) return 1;
printf(
"Func() -> %d\n",
func());
return 0;
}
Constant Pool
Compiler provides two constant pools for a general purpose code generation:
- Local constant pool - Part of FuncNode, can be only used by a single function and added after the function epilog sequence (after
ret
instruction).
- Global constant pool - Part of BaseCompiler, flushed at the end of the generated code by BaseEmitter::finalize().
The example below illustrates how a built-in constant pool can be used:
#include <asmjit/x86.h>
cc.
addFunc(FuncSignature::build<int>());
cc.mov(v0, c0);
cc.mov(v1, c1);
cc.add(v0, v1);
}
Jump Tables
x86::Compiler supports jmp
instruction with reg/mem operand, which is a commonly used pattern to implement indirect jumps within a function, for example to implement switch()
statement in a programming languages. By default AsmJit assumes that every basic block can be a possible jump target as it's unable to deduce targets from instruction's operands. This is a very pessimistic default that should be avoided if possible as it's costly and very unfriendly to liveness analysis and register allocation.
Instead of relying on such pessimistic default behavior, let's use JumpAnnotation to annotate a jump where all targets are known:
#include <asmjit/x86.h>
FuncNode* funcNode = cc.
addFunc(FuncSignature::build<float, float, float, uint32_t>());
x86::Gp target = cc.newIntPtr(
"target");
x86::Gp offset = cc.newIntPtr(
"offset");
cc.movsxd(target, x86::dword_ptr(offset, op.
cloneAs(offset), 2));
else
cc.mov(target, x86::dword_ptr(offset, op.
cloneAs(offset), 2));
cc.add(target, offset);
cc.
jmp(target, annotation);
cc.addss(a, b);
cc.subss(a, b);
cc.mulss(a, b);
cc.divss(a, b);
}
Compiler(
const Compiler& other) =
delete
~Compiler()
noexcept override
RegT newSimilarReg(const RegT& ref)
template<typename RegT, typename... Args> RegT newSimilarReg(const RegT& ref, const char* fmt, Args&&... args)
template<typename... Args> Reg newReg(
TypeId typeId,
const char* fmt, Args&&... args)
template<typename... Args> Gp newGp(
TypeId typeId,
const char* fmt, Args&&... args)
template<typename... Args> Vec newVec(
TypeId typeId,
const char* fmt, Args&&... args)
template<typename... Args> KReg newK(
TypeId typeId,
const char* fmt, Args&&... args)
template<typename... Args> Gp newInt8(
const char* fmt, Args&&... args)
template<typename... Args> Gp newUInt8(
const char* fmt, Args&&... args)
template<typename... Args> Gp newInt16(
const char* fmt, Args&&... args)
template<typename... Args> Gp newUInt16(
const char* fmt, Args&&... args)
template<typename... Args> Gp newInt32(
const char* fmt, Args&&... args)
template<typename... Args> Gp newUInt32(
const char* fmt, Args&&... args)
template<typename... Args> Gp newInt64(
const char* fmt, Args&&... args)
template<typename... Args> Gp newUInt64(
const char* fmt, Args&&... args)
template<typename... Args> Gp newIntPtr(
const char* fmt, Args&&... args)
template<typename... Args> Gp newUIntPtr(
const char* fmt, Args&&... args)
template<typename... Args> Gp newGpb(
const char* fmt, Args&&... args)
template<typename... Args> Gp newGpw(
const char* fmt, Args&&... args)
template<typename... Args> Gp newGpd(
const char* fmt, Args&&... args)
template<typename... Args> Gp newGpq(
const char* fmt, Args&&... args)
template<typename... Args> Gp newGpz(
const char* fmt, Args&&... args)
template<typename... Args> Xmm newXmm(
const char* fmt, Args&&... args)
template<typename... Args> Xmm newXmmSs(
const char* fmt, Args&&... args)
template<typename... Args> Xmm newXmmSd(
const char* fmt, Args&&... args)
template<typename... Args> Xmm newXmmPs(
const char* fmt, Args&&... args)
template<typename... Args> Xmm newXmmPd(
const char* fmt, Args&&... args)
template<typename... Args> Ymm newYmm(
const char* fmt, Args&&... args)
template<typename... Args> Ymm newYmmPs(
const char* fmt, Args&&... args)
template<typename... Args> Ymm newYmmPd(
const char* fmt, Args&&... args)
template<typename... Args> Zmm newZmm(
const char* fmt, Args&&... args)
template<typename... Args> Zmm newZmmPs(
const char* fmt, Args&&... args)
template<typename... Args> Zmm newZmmPd(
const char* fmt, Args&&... args)
template<typename... Args> Mm newMm(
const char* fmt, Args&&... args)
template<typename... Args> KReg newKb(
const char* fmt, Args&&... args)
template<typename... Args> KReg newKw(
const char* fmt, Args&&... args)
template<typename... Args> KReg newKd(
const char* fmt, Args&&... args)
template<typename... Args> KReg newKq(
const char* fmt, Args&&... args)
Mem newStack(
uint32_t size,
uint32_t alignment,
const char* name = nullptr)
template<typename T, typename... Args>
template<typename T>
template<typename T, typename... Args>
template<typename T, typename... Args> Error addPassT(Args&&... args)
Error embed(
const void* data,
size_t dataSize)
override Error comment(
const char* data,
size_t size = SIZE_MAX)
override T* as() noexcept
const T* as() const noexcept
template<typename... Args>
Gp gpz(
uint32_t id)
const noexcept Gp gpz(
const Gp& reg)
const noexcept Mem ptr_base(
uint32_t baseId,
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zax(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zcx(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zdx(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zbx(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zsp(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zbp(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zsi(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem ptr_zdi(
int32_t off = 0,
uint32_t size = 0)
const noexcept Mem intptr_ptr(
const Gp& base,
const Gp& index,
uint32_t shift = 0,
int32_t offset = 0)
const noexcept Mem intptr_ptr(
const Gp& base,
const Vec& index,
uint32_t shift = 0,
int32_t offset = 0)
const noexcept Mem intptr_ptr(
const Label& base,
const Gp& index,
uint32_t shift,
int32_t offset = 0)
const noexcept Mem intptr_ptr(
uint64_t base,
const Gp& index,
uint32_t shift = 0)
const noexcept Error db(
uint8_t x,
size_t repeatCount = 1)
Error dw(
uint16_t x,
size_t repeatCount = 1)
Error dd(
uint32_t x,
size_t repeatCount = 1)
Error dq(
uint64_t x,
size_t repeatCount = 1)
template<typename T>
This&
rep(
const Gp& zcx)
noexceptThis&
repe(
const Gp& zcx)
noexceptThis&
repz(
const Gp& zcx)
noexceptThis&
repne(
const Gp& zcx)
noexceptThis&
repnz(
const Gp& zcx)
noexceptThis&
k(
const KReg& kreg)
noexceptError cbw(
const Gp_AX& o0)
Error cdq(
const Gp_EDX& o0,
const Gp_EAX& o1)
Error cdqe(
const Gp_EAX& o0)
Error cqo(
const Gp_RDX& o0,
const Gp_RAX& o1)
Error cwd(
const Gp_DX& o0,
const Gp_AX& o1)
Error cwde(
const Gp_EAX& o0)
Error cmps(
const DS_ZSI& o0,
const ES_ZDI& o1)
Error cmpxchg(
const Gp& o0,
const Gp& o1,
const Gp_ZAX& o2)
Error cmpxchg(
const Mem& o0,
const Gp& o1,
const Gp_ZAX& o2)
Error cmpxchg16b(
const Mem& o0,
const Gp_RDX& o1,
const Gp_RAX& o2,
const Gp_RCX& o3,
const Gp_RBX& o4)
Error cmpxchg8b(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2,
const Gp_ECX& o3,
const Gp_EBX& o4)
Error lods(
const Gp_ZAX& o0,
const DS_ZSI& o1)
Error loop(
const Gp_ZCX& o0,
const Imm& o1)
Error loope(
const Gp_ZCX& o0,
const Imm& o1)
Error loopne(
const Gp_ZCX& o0,
const Imm& o1)
Error movs(
const ES_ZDI& o0,
const DS_ZSI& o1)
Error mul(
const Gp_AX& o0,
const Gp& o1)
Error mul(
const Gp_AX& o0,
const Mem& o1)
Error mul(
const Gp_ZDX& o0,
const Gp_ZAX& o1,
const Gp& o2)
Error mul(
const Gp_ZDX& o0,
const Gp_ZAX& o1,
const Mem& o2)
Error rcl(
const Gp& o0,
const Gp_CL& o1)
Error rcl(
const Mem& o0,
const Gp_CL& o1)
Error rcr(
const Gp& o0,
const Gp_CL& o1)
Error rcr(
const Mem& o0,
const Gp_CL& o1)
Error rol(
const Gp& o0,
const Gp_CL& o1)
Error rol(
const Mem& o0,
const Gp_CL& o1)
Error ror(
const Gp& o0,
const Gp_CL& o1)
Error ror(
const Mem& o0,
const Gp_CL& o1)
Error sal(
const Gp& o0,
const Gp_CL& o1)
Error sal(
const Mem& o0,
const Gp_CL& o1)
Error sar(
const Gp& o0,
const Gp_CL& o1)
Error sar(
const Mem& o0,
const Gp_CL& o1)
Error scas(
const Gp_ZAX& o0,
const ES_ZDI& o1)
Error shl(
const Gp& o0,
const Gp_CL& o1)
Error shl(
const Mem& o0,
const Gp_CL& o1)
Error shr(
const Gp& o0,
const Gp_CL& o1)
Error shr(
const Mem& o0,
const Gp_CL& o1)
Error shld(
const Gp& o0,
const Gp& o1,
const Gp_CL& o2)
Error shld(
const Mem& o0,
const Gp& o1,
const Gp_CL& o2)
Error shrd(
const Gp& o0,
const Gp& o1,
const Gp_CL& o2)
Error shrd(
const Mem& o0,
const Gp& o1,
const Gp_CL& o2)
Error stos(
const ES_ZDI& o0,
const Gp_ZAX& o1)
Error in(
const Gp_ZAX& o0,
const Imm& o1)
Error in(
const Gp_ZAX& o0,
const Gp_DX& o1)
Error ins(
const ES_ZDI& o0,
const Gp_DX& o1)
Error out(
const Imm& o0,
const Gp_ZAX& o1)
Error out(
const Gp_DX& o0,
const Gp_ZAX& o1)
Error outs(
const Gp_DX& o0,
const DS_ZSI& o1)
Error cpuid(
const Gp_EAX& o0,
const Gp_EBX& o1,
const Gp_ECX& o2,
const Gp_EDX& o3)
Error lahf(
const Gp_AH& o0)
Error sahf(
const Gp_AH& o0)
Error mulx(
const Gp& o0,
const Gp& o1,
const Gp& o2,
const Gp_ZDX& o3)
Error mulx(
const Gp& o0,
const Gp& o1,
const Mem& o2,
const Gp_ZDX& o3)
Error clzero(
const DS_ZAX& o0)
Error rdpru(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error rdpkru(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error rdtsc(
const Gp_EDX& o0,
const Gp_EAX& o1)
Error rdtscp(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error xgetbv(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error xrstor(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xrstor64(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xrstors(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xrstors64(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsave(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsave64(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsavec(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsavec64(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsaveopt(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsaveopt64(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsaves(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error xsaves64(
const Mem& o0,
const Gp_EDX& o1,
const Gp_EAX& o2)
Error rdmsr(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error rdpmc(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error wrmsr(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error xsetbv(
const Gp_EDX& o0,
const Gp_EAX& o1,
const Gp_ECX& o2)
Error invlpgb(
const Gp_EAX& o0,
const Gp_EDX& o1,
const Gp_ECX& o2)
Error blendvpd(
const Xmm& o0,
const Xmm& o1,
const XMM0& o2)
Error blendvpd(
const Xmm& o0,
const Mem& o1,
const XMM0& o2)
Error blendvps(
const Xmm& o0,
const Xmm& o1,
const XMM0& o2)
Error blendvps(
const Xmm& o0,
const Mem& o1,
const XMM0& o2)
Error maskmovq(
const Mm& o0,
const Mm& o1,
const DS_ZDI& o2)
Error maskmovdqu(
const Xmm& o0,
const Xmm& o1,
const DS_ZDI& o2)
Error pblendvb(
const Xmm& o0,
const Xmm& o1,
const XMM0& o2)
Error pblendvb(
const Xmm& o0,
const Mem& o1,
const XMM0& o2)
Error pcmpestri(
const Xmm& o0,
const Xmm& o1,
const Imm& o2,
const Gp_ECX& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error pcmpestri(
const Xmm& o0,
const Mem& o1,
const Imm& o2,
const Gp_ECX& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error pcmpestrm(
const Xmm& o0,
const Xmm& o1,
const Imm& o2,
const XMM0& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error pcmpestrm(
const Xmm& o0,
const Mem& o1,
const Imm& o2,
const XMM0& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error pcmpistri(
const Xmm& o0,
const Xmm& o1,
const Imm& o2,
const Gp_ECX& o3)
Error pcmpistri(
const Xmm& o0,
const Mem& o1,
const Imm& o2,
const Gp_ECX& o3)
Error pcmpistrm(
const Xmm& o0,
const Xmm& o1,
const Imm& o2,
const XMM0& o3)
Error pcmpistrm(
const Xmm& o0,
const Mem& o1,
const Imm& o2,
const XMM0& o3)
Error sha256rnds2(
const Xmm& o0,
const Xmm& o1,
const XMM0& o2)
Error sha256rnds2(
const Xmm& o0,
const Mem& o1,
const XMM0& o2)
Error vmaskmovdqu(
const Vec& o0,
const Vec& o1,
const DS_ZDI& o2)
Error vpcmpestri(
const Vec& o0,
const Vec& o1,
const Imm& o2,
const Gp_ECX& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error vpcmpestri(
const Vec& o0,
const Mem& o1,
const Imm& o2,
const Gp_ECX& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error vpcmpestrm(
const Vec& o0,
const Vec& o1,
const Imm& o2,
const XMM0& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error vpcmpestrm(
const Vec& o0,
const Mem& o1,
const Imm& o2,
const XMM0& o3,
const Gp_EAX& o4,
const Gp_EDX& o5)
Error vpcmpistri(
const Vec& o0,
const Vec& o1,
const Imm& o2,
const Gp_ECX& o3)
Error vpcmpistri(
const Vec& o0,
const Mem& o1,
const Imm& o2,
const Gp_ECX& o3)
Error vpcmpistrm(
const Vec& o0,
const Vec& o1,
const Imm& o2,
const XMM0& o3)
Error vpcmpistrm(
const Vec& o0,
const Mem& o1,
const Imm& o2,
const XMM0& o3)