asmjit::x86::Compiler Class Reference

X86/X64 compiler implementation.

Compiler Basics

The first x86::Compiler example shows how to generate a function that simply returns an integer value. It's an analogy to the first Assembler example:

#include <asmjit/x86.h>
#include <stdio.h>
using namespace asmjit;
// Signature of the generated function.
typedef int (*Func)(void);
int main() {
JitRuntime rt; // Runtime specialized for JIT code execution.
CodeHolder code; // Holds code and relocation information.
code.init(rt.environment()); // Initialize code to match the JIT environment.
x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
cc.addFunc(FuncSignatureT<int>());// Begin a function of `int fn(void)` signature.
x86::Gp vReg = cc.newGpd(); // Create a 32-bit general purpose register.
cc.mov(vReg, 1); // Move one to our virtual register `vReg`.
cc.ret(vReg); // Return `vReg` from the function.
cc.endFunc(); // End of the function body.
cc.finalize(); // Translate and assemble the whole 'cc' content.
// ----> x86::Compiler is no longer needed from here and can be destroyed <----
Func fn;
Error err = rt.add(&fn, &code); // Add the generated code to the runtime.
if (err) return 1; // Handle a possible error returned by AsmJit.
// ----> CodeHolder is no longer needed from here and can be destroyed <----
int result = fn(); // Execute the generated code.
printf("%d\n", result); // Print the resulting "1".
rt.release(fn); // Explicitly remove the function from the runtime.
return 0;
}

The BaseCompiler::addFunc() and BaseCompiler::endFunc() functions are used to define the function and its end. Both must be called per function, but the body doesn't have to be generated in sequence. An example of generating two functions will be shown later. The next example shows more complicated code that contain a loop and generates a simple memory copy function that uses uint32_t items:

#include <asmjit/x86.h>
#include <stdio.h>
using namespace asmjit;
// Signature of the generated function.
typedef void (*MemCpy32)(uint32_t* dst, const uint32_t* src, size_t count);
int main() {
JitRuntime rt; // Runtime specialized for JIT code execution.
CodeHolder code; // Holds code and relocation information.
code.init(rt.environment()); // Initialize code to match the JIT environment.
x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
cc.addFunc( // Begin the function of the following signature:
FuncSignatureT<void, // Return value - void (no return value).
uint32_t*, // 1st argument - uint32_t* (machine reg-size).
const uint32_t*, // 2nd argument - uint32_t* (machine reg-size).
size_t>()); // 3rd argument - size_t (machine reg-size).
Label L_Loop = cc.newLabel(); // Start of the loop.
Label L_Exit = cc.newLabel(); // Used to exit early.
x86::Gp dst = cc.newIntPtr("dst");// Create `dst` register (destination pointer).
x86::Gp src = cc.newIntPtr("src");// Create `src` register (source pointer).
x86::Gp i = cc.newUIntPtr("i"); // Create `i` register (loop counter).
cc.setArg(0, dst); // Assign `dst` argument.
cc.setArg(1, src); // Assign `src` argument.
cc.setArg(2, i); // Assign `i` argument.
cc.test(i, i); // Early exit if length is zero.
cc.jz(L_Exit);
cc.bind(L_Loop); // Bind the beginning of the loop here.
x86::Gp tmp = cc.newInt32("tmp"); // Copy a single dword (4 bytes).
cc.mov(tmp, x86::dword_ptr(src)); // Load DWORD from [src] address.
cc.mov(x86::dword_ptr(dst), tmp); // Store DWORD to [dst] address.
cc.add(src, 4); // Increment `src`.
cc.add(dst, 4); // Increment `dst`.
cc.dec(i); // Loop until `i` is non-zero.
cc.jnz(L_Loop);
cc.bind(L_Exit); // Label used by early exit.
cc.endFunc(); // End of the function body.
cc.finalize(); // Translate and assemble the whole 'cc' content.
// ----> x86::Compiler is no longer needed from here and can be destroyed <----
// Add the generated code to the runtime.
MemCpy32 memcpy32;
Error err = rt.add(&memcpy32, &code);
// Handle a possible error returned by AsmJit.
if (err)
return 1;
// ----> CodeHolder is no longer needed from here and can be destroyed <----
// Test the generated code.
uint32_t input[6] = { 1, 2, 3, 5, 8, 13 };
uint32_t output[6];
memcpy32(output, input, 6);
for (uint32_t i = 0; i < 6; i++)
printf("%d\n", output[i]);
rt.release(memcpy32);
return 0;
}

Recursive Functions

It's possible to create more functions by using the same x86::Compiler instance and make links between them. In such case it's important to keep the pointer to FuncNode.

The example below creates a simple Fibonacci function that calls itself recursively:

#include <asmjit/x86.h>
#include <stdio.h>
using namespace asmjit;
// Signature of the generated function.
typedef uint32_t (*Fibonacci)(uint32_t x);
int main() {
JitRuntime rt; // Runtime specialized for JIT code execution.
CodeHolder code; // Holds code and relocation information.
code.init(rt.environment()); // Initialize code to match the JIT environment.
x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
FuncNode* func = cc.addFunc( // Begin of the Fibonacci function, addFunc()
FuncSignatureT<int, int>()); // Returns a pointer to the FuncNode node.
Label L_Exit = cc.newLabel() // Exit label.
x86::Gp x = cc.newU32(); // Function x argument.
x86::Gp y = cc.newU32(); // Temporary.
cc.setArg(0, x);
cc.cmp(x, 3); // Return x if less than 3.
cc.jb(L_Exit);
cc.mov(y, x); // Make copy of the original x.
cc.dec(x); // Decrease x.
InvokeNode* invokeNode; // Function invocation:
cc.invoke(&invokeNode, // - InvokeNode (output).
func->label(), // - Function address or Label.
FuncSignatureT<int, int>()); // - Function signature.
invokeNode->setArg(0, x); // Assign x as the first argument.
invokeNode->setRet(0, x); // Assign x as a return value as well.
cc.add(x, y); // Combine the return value with y.
cc.bind(L_Exit);
cc.ret(x); // Return x.
cc.endFunc(); // End of the function body.
cc.finalize(); // Translate and assemble the whole 'cc' content.
// ----> x86::Compiler is no longer needed from here and can be destroyed <----
Fibonacci fib;
Error err = rt.add(&fib, &code); // Add the generated code to the runtime.
if (err) return 1; // Handle a possible error returned by AsmJit.
// ----> CodeHolder is no longer needed from here and can be destroyed <----
// Test the generated code.
printf("Fib(%u) -> %u\n", 8, fib(8));
rt.release(fib);
return 0;
}

Stack Management

Function's stack-frame is managed automatically, which is used by the register allocator to spill virtual registers. It also provides an interface to allocate user-defined block of the stack, which can be used as a temporary storage by the generated function. In the following example a stack of 256 bytes size is allocated, filled by bytes starting from 0 to 255 and then iterated again to sum all the values.

#include <asmjit/x86.h>
#include <stdio.h>
using namespace asmjit;
// Signature of the generated function.
typedef int (*Func)(void);
int main() {
JitRuntime rt; // Runtime specialized for JIT code execution.
CodeHolder code; // Holds code and relocation information.
code.init(rt.environment()); // Initialize code to match the JIT environment.
x86::Compiler cc(&code); // Create and attach x86::Compiler to code.
cc.addFunc(FuncSignatureT<int>());// Create a function that returns int.
x86::Gp p = cc.newIntPtr("p");
x86::Gp i = cc.newIntPtr("i");
// Allocate 256 bytes on the stack aligned to 4 bytes.
x86::Mem stack = cc.newStack(256, 4);
x86::Mem stackIdx(stack); // Copy of stack with i added.
stackIdx.setIndex(i); // stackIdx <- stack[i].
stackIdx.setSize(1); // stackIdx <- byte ptr stack[i].
// Load a stack address to `p`. This step is purely optional and shows
// that `lea` is useful to load a memory operands address (even absolute)
// to a general purpose register.
cc.lea(p, stack);
// Clear i (xor is a C++ keyword, hence 'xor_' is used instead).
cc.xor_(i, i);
Label L1 = cc.newLabel();
Label L2 = cc.newLabel();
cc.bind(L1); // First loop, fill the stack.
cc.mov(stackIdx, i.r8()); // stack[i] = uint8_t(i).
cc.inc(i); // i++;
cc.cmp(i, 256); // if (i < 256)
cc.jb(L1); // goto L1;
// Second loop, sum all bytes stored in `stack`.
x86::Gp sum = cc.newI32("sum");
x86::Gp val = cc.newI32("val");
cc.xor_(i, i);
cc.xor_(sum, sum);
cc.bind(L2);
cc.movzx(val, stackIdx); // val = uint32_t(stack[i]);
cc.add(sum, val); // sum += val;
cc.inc(i); // i++;
cc.cmp(i, 256); // if (i < 256)
cc.jb(L2); // goto L2;
cc.ret(sum); // Return the `sum` of all values.
cc.endFunc(); // End of the function body.
cc.finalize(); // Translate and assemble the whole 'cc' content.
// ----> x86::Compiler is no longer needed from here and can be destroyed <----
Func func;
Error err = rt.add(&func, &code); // Add the generated code to the runtime.
if (err) return 1; // Handle a possible error returned by AsmJit.
// ----> CodeHolder is no longer needed from here and can be destroyed <----
printf("Func() -> %d\n", func()); // Test the generated code.
return 0;
}

Constant Pool

Compiler provides two constant pools for a general purpose code generation:

  • Local constant pool - Part of FuncNode, can be only used by a single function and added after the function epilog sequence (after ret instruction).
  • Global constant pool - Part of BaseCompiler, flushed at the end of the generated code by BaseEmitter::finalize().

The example below illustrates how a built-in constant pool can be used:

#include <asmjit/x86.h>
using namespace asmjit;
static void exampleUseOfConstPool(x86::Compiler& cc) {
x86::Gp v0 = cc.newGpd("v0");
x86::Gp v1 = cc.newGpd("v1");
cc.mov(v0, c0);
cc.mov(v1, c1);
cc.add(v0, v1);
cc.ret(v0);
cc.endFunc();
}

Jump Tables

x86::Compiler supports jmp instruction with reg/mem operand, which is a commonly used pattern to implement indirect jumps within a function, for example to implement switch() statement in a programming languages. By default AsmJit assumes that every basic block can be a possible jump target as it's unable to deduce targets from instruction's operands. This is a very pessimistic default that should be avoided if possible as it's costly and very unfriendly to liveness analysis and register allocation.

Instead of relying on such pessimistic default behavior, let's use JumpAnnotation to annotate a jump where all targets are known:

#include <asmjit/x86.h>
using namespace asmjit;
static void exampleUseOfIndirectJump(x86::Compiler& cc) {
// Function arguments
x86::Xmm a = cc.newXmmSs("a");
x86::Xmm b = cc.newXmmSs("b");
x86::Gp op = cc.newUInt32("op");
x86::Gp target = cc.newIntPtr("target");
x86::Gp offset = cc.newIntPtr("offset");
Label L_Table = cc.newLabel();
Label L_Add = cc.newLabel();
Label L_Sub = cc.newLabel();
Label L_Mul = cc.newLabel();
Label L_Div = cc.newLabel();
Label L_End = cc.newLabel();
cc.setArg(0, a);
cc.setArg(1, b);
cc.setArg(2, op);
// Jump annotation is a building block that allows to annotate all
// possible targets where `jmp()` can jump. It then drives the CFG
// contruction and liveness analysis, which impacts register allocation.
JumpAnnotation* annotation = cc.newJumpAnnotation();
annotation->addLabel(L_Add);
annotation->addLabel(L_Sub);
annotation->addLabel(L_Mul);
annotation->addLabel(L_Div);
// Most likely not the common indirect jump approach, but it
// doesn't really matter how final address is calculated. The
// most important path using JumpAnnotation with `jmp()`.
cc.lea(offset, x86::ptr(L_Table));
if (cc.is64Bit())
cc.movsxd(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
else
cc.mov(target, x86::dword_ptr(offset, op.cloneAs(offset), 2));
cc.add(target, offset);
cc.jmp(target, annotation);
// Acts like a switch() statement in C.
cc.bind(L_Add);
cc.addss(a, b);
cc.jmp(L_End);
cc.bind(L_Sub);
cc.subss(a, b);
cc.jmp(L_End);
cc.bind(L_Mul);
cc.mulss(a, b);
cc.jmp(L_End);
cc.bind(L_Div);
cc.divss(a, b);
cc.bind(L_End);
cc.ret(a);
cc.endFunc();
// Relative int32_t offsets of `L_XXX - L_Table`.
cc.bind(L_Table);
cc.embedLabelDelta(L_Add, L_Table, 4);
cc.embedLabelDelta(L_Sub, L_Table, 4);
cc.embedLabelDelta(L_Mul, L_Table, 4);
cc.embedLabelDelta(L_Div, L_Table, 4);
}

Public Types

- Public Types inherited from asmjit::BaseCompiler
- Public Types inherited from asmjit::BaseBuilder
- Public Types inherited from asmjit::BaseEmitter

Member Functions

Construction & Destruction
Virtual Registers
Stack
Constants
Instruction Options
Function Call & Ret Intrinsics
Jump Tables Support
Finalize
Events
- Public Member Functions inherited from asmjit::BaseCompiler
- Public Member Functions inherited from asmjit::BaseBuilder
- Public Member Functions inherited from asmjit::BaseEmitter
- Public Member Functions inherited from asmjit::x86::EmitterExplicitT< Compiler >