aboutsummaryrefslogtreecommitdiff
path: root/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'lib/ExecutionEngine/Orc/OrcTargetSupport.cpp')
-rw-r--r--lib/ExecutionEngine/Orc/OrcTargetSupport.cpp275
1 files changed, 154 insertions, 121 deletions
diff --git a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
index 258868aa64f6..b931f10b9d78 100644
--- a/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
+++ b/lib/ExecutionEngine/Orc/OrcTargetSupport.cpp
@@ -1,137 +1,170 @@
+//===------- OrcTargetSupport.cpp - Target support utilities for Orc ------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
#include "llvm/ADT/Triple.h"
#include "llvm/ExecutionEngine/Orc/OrcTargetSupport.h"
+#include "llvm/Support/Process.h"
#include <array>
-using namespace llvm::orc;
+namespace llvm {
+namespace orc {
-namespace {
+void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
+ void *CallbackMgr) {
+
+ const uint8_t ResolverCode[] = {
+ // resolver_entry:
+ 0x55, // 0x00: pushq %rbp
+ 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
+ 0x50, // 0x04: pushq %rax
+ 0x53, // 0x05: pushq %rbx
+ 0x51, // 0x06: pushq %rcx
+ 0x52, // 0x07: pushq %rdx
+ 0x56, // 0x08: pushq %rsi
+ 0x57, // 0x09: pushq %rdi
+ 0x41, 0x50, // 0x0a: pushq %r8
+ 0x41, 0x51, // 0x0c: pushq %r9
+ 0x41, 0x52, // 0x0e: pushq %r10
+ 0x41, 0x53, // 0x10: pushq %r11
+ 0x41, 0x54, // 0x12: pushq %r12
+ 0x41, 0x55, // 0x14: pushq %r13
+ 0x41, 0x56, // 0x16: pushq %r14
+ 0x41, 0x57, // 0x18: pushq %r15
+ 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 20, %rsp
+ 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
+ 0x48, 0x8d, 0x3d, 0x43, 0x00, 0x00, 0x00, // 0x26: leaq 67(%rip), %rdi
+ 0x48, 0x8b, 0x3f, // 0x2d: movq (%rdi), %rdi
+ 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi
+ 0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi
+ 0x48, 0xb8, // 0x38: movabsq $0, %rax
+
+ // 0x3a: JIT re-entry fn addr:
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+
+ 0xff, 0xd0, // 0x42: callq *%rax
+ 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp)
+ 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp)
+ 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 20, %rsp
+ 0x41, 0x5f, // 0x54: popq %r15
+ 0x41, 0x5e, // 0x56: popq %r14
+ 0x41, 0x5d, // 0x58: popq %r13
+ 0x41, 0x5c, // 0x5a: popq %r12
+ 0x41, 0x5b, // 0x5c: popq %r11
+ 0x41, 0x5a, // 0x5e: popq %r10
+ 0x41, 0x59, // 0x60: popq %r9
+ 0x41, 0x58, // 0x62: popq %r8
+ 0x5f, // 0x64: popq %rdi
+ 0x5e, // 0x65: popq %rsi
+ 0x5a, // 0x66: popq %rdx
+ 0x59, // 0x67: popq %rcx
+ 0x5b, // 0x68: popq %rbx
+ 0x58, // 0x69: popq %rax
+ 0x5d, // 0x6a: popq %rbp
+ 0xc3, // 0x6b: retq
+ 0x00, 0x00, 0x00, 0x00, // 0x6c: <padding>
+
+ // 0x70: Callback mgr address.
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ };
-uint64_t executeCompileCallback(JITCompileCallbackManagerBase *JCBM,
- TargetAddress CallbackID) {
- return JCBM->executeCompileCallback(CallbackID);
+ const unsigned ReentryFnAddrOffset = 0x3a;
+ const unsigned CallbackMgrAddrOffset = 0x70;
+
+ memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+ memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
+ memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
+ sizeof(CallbackMgr));
}
-}
+void OrcX86_64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+ unsigned NumTrampolines) {
-namespace llvm {
-namespace orc {
+ unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
-const char* OrcX86_64::ResolverBlockName = "orc_resolver_block";
-
-void OrcX86_64::insertResolverBlock(
- Module &M, JITCompileCallbackManagerBase &JCBM) {
-
- // Trampoline code-sequence length, used to get trampoline address from return
- // address.
- const unsigned X86_64_TrampolineLength = 6;
-
- // List of x86-64 GPRs to save. Note - RBP saved separately below.
- std::array<const char *, 14> GPRs = {{
- "rax", "rbx", "rcx", "rdx",
- "rsi", "rdi", "r8", "r9",
- "r10", "r11", "r12", "r13",
- "r14", "r15"
- }};
-
- // Address of the executeCompileCallback function.
- uint64_t CallbackAddr =
- static_cast<uint64_t>(
- reinterpret_cast<uintptr_t>(executeCompileCallback));
-
- std::ostringstream AsmStream;
- Triple TT(M.getTargetTriple());
-
- // Switch to text section.
- if (TT.getOS() == Triple::Darwin)
- AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
- << ".align 4, 0x90\n";
- else
- AsmStream << ".text\n"
- << ".align 16, 0x90\n";
-
- // Bake in a pointer to the callback manager immediately before the
- // start of the resolver function.
- AsmStream << "jit_callback_manager_addr:\n"
- << " .quad " << &JCBM << "\n";
-
- // Start the resolver function.
- AsmStream << ResolverBlockName << ":\n"
- << " pushq %rbp\n"
- << " movq %rsp, %rbp\n";
-
- // Store the GPRs.
- for (const auto &GPR : GPRs)
- AsmStream << " pushq %" << GPR << "\n";
-
- // Store floating-point state with FXSAVE.
- // Note: We need to keep the stack 16-byte aligned, so if we've emitted an odd
- // number of 64-bit pushes so far (GPRs.size() plus 1 for RBP) then add
- // an extra 64 bits of padding to the FXSave area.
- unsigned Padding = (GPRs.size() + 1) % 2 ? 8 : 0;
- unsigned FXSaveSize = 512 + Padding;
- AsmStream << " subq $" << FXSaveSize << ", %rsp\n"
- << " fxsave64 (%rsp)\n"
-
- // Load callback manager address, compute trampoline address, call JIT.
- << " lea jit_callback_manager_addr(%rip), %rdi\n"
- << " movq (%rdi), %rdi\n"
- << " movq 0x8(%rbp), %rsi\n"
- << " subq $" << X86_64_TrampolineLength << ", %rsi\n"
- << " movabsq $" << CallbackAddr << ", %rax\n"
- << " callq *%rax\n"
-
- // Replace the return to the trampoline with the return address of the
- // compiled function body.
- << " movq %rax, 0x8(%rbp)\n"
-
- // Restore the floating point state.
- << " fxrstor64 (%rsp)\n"
- << " addq $" << FXSaveSize << ", %rsp\n";
-
- for (const auto &GPR : make_range(GPRs.rbegin(), GPRs.rend()))
- AsmStream << " popq %" << GPR << "\n";
-
- // Restore original RBP and return to compiled function body.
- AsmStream << " popq %rbp\n"
- << " retq\n";
-
- M.appendModuleInlineAsm(AsmStream.str());
-}
+ memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void*));
-OrcX86_64::LabelNameFtor
-OrcX86_64::insertCompileCallbackTrampolines(Module &M,
- TargetAddress ResolverBlockAddr,
- unsigned NumCalls,
- unsigned StartIndex) {
- const char *ResolverBlockPtrName = "Lorc_resolve_block_addr";
-
- std::ostringstream AsmStream;
- Triple TT(M.getTargetTriple());
-
- if (TT.getOS() == Triple::Darwin)
- AsmStream << ".section __TEXT,__text,regular,pure_instructions\n"
- << ".align 4, 0x90\n";
- else
- AsmStream << ".text\n"
- << ".align 16, 0x90\n";
-
- AsmStream << ResolverBlockPtrName << ":\n"
- << " .quad " << ResolverBlockAddr << "\n";
-
- auto GetLabelName =
- [=](unsigned I) {
- std::ostringstream LabelStream;
- LabelStream << "orc_jcc_" << (StartIndex + I);
- return LabelStream.str();
- };
+ uint64_t *Trampolines = reinterpret_cast<uint64_t*>(TrampolineMem);
+ uint64_t CallIndirPCRel = 0xf1c40000000015ff;
- for (unsigned I = 0; I < NumCalls; ++I)
- AsmStream << GetLabelName(I) << ":\n"
- << " callq *" << ResolverBlockPtrName << "(%rip)\n";
-
- M.appendModuleInlineAsm(AsmStream.str());
+ for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
+ Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
+}
- return GetLabelName;
+std::error_code OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
+ unsigned MinStubs,
+ void *InitialPtrVal) {
+ // Stub format is:
+ //
+ // .section __orc_stubs
+ // stub1:
+ // jmpq *ptr1(%rip)
+ // .byte 0xC4 ; <- Invalid opcode padding.
+ // .byte 0xF1
+ // stub2:
+ // jmpq *ptr2(%rip)
+ //
+ // ...
+ //
+ // .section __orc_ptrs
+ // ptr1:
+ // .quad 0x0
+ // ptr2:
+ // .quad 0x0
+ //
+ // ...
+
+ const unsigned StubSize = IndirectStubsInfo::StubSize;
+
+ // Emit at least MinStubs, rounded up to fill the pages allocated.
+ unsigned PageSize = sys::Process::getPageSize();
+ unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
+ unsigned NumStubs = (NumPages * PageSize) / StubSize;
+
+ // Allocate memory for stubs and pointers in one call.
+ std::error_code EC;
+ auto StubsMem =
+ sys::OwningMemoryBlock(
+ sys::Memory::allocateMappedMemory(2 * NumPages * PageSize, nullptr,
+ sys::Memory::MF_READ |
+ sys::Memory::MF_WRITE,
+ EC));
+
+ if (EC)
+ return EC;
+
+ // Create separate MemoryBlocks representing the stubs and pointers.
+ sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
+ sys::MemoryBlock PtrsBlock(static_cast<char*>(StubsMem.base()) +
+ NumPages * PageSize,
+ NumPages * PageSize);
+
+ // Populate the stubs page stubs and mark it executable.
+ uint64_t *Stub = reinterpret_cast<uint64_t*>(StubsBlock.base());
+ uint64_t PtrOffsetField =
+ static_cast<uint64_t>(NumPages * PageSize - 6) << 16;
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
+
+ if (auto EC = sys::Memory::protectMappedMemory(StubsBlock,
+ sys::Memory::MF_READ |
+ sys::Memory::MF_EXEC))
+ return EC;
+
+ // Initialize all pointers to point at FailureAddress.
+ void **Ptr = reinterpret_cast<void**>(PtrsBlock.base());
+ for (unsigned I = 0; I < NumStubs; ++I)
+ Ptr[I] = InitialPtrVal;
+
+ StubsInfo.NumStubs = NumStubs;
+ StubsInfo.StubsMem = std::move(StubsMem);
+
+ return std::error_code();
}
} // End namespace orc.