1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
|
//===-- ARMJITInfo.cpp - Implement the JIT interfaces for the ARM target --===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the JIT interfaces for the ARM target.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "jit"
#include "ARMJITInfo.h"
#include "ARMInstrInfo.h"
#include "ARMConstantPoolValue.h"
#include "ARMRelocations.h"
#include "ARMSubtarget.h"
#include "llvm/Function.h"
#include "llvm/CodeGen/JITCodeEmitter.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Memory.h"
#include <cstdlib>
using namespace llvm;
void ARMJITInfo::replaceMachineCodeForFunction(void *Old, void *New) {
report_fatal_error("ARMJITInfo::replaceMachineCodeForFunction");
}
/// JITCompilerFunction - This contains the address of the JIT function used to
/// compile a function lazily.
static TargetJITInfo::JITCompilerFn JITCompilerFunction;
// Get the ASMPREFIX for the current host. This is often '_'.
#ifndef __USER_LABEL_PREFIX__
#define __USER_LABEL_PREFIX__
#endif
#define GETASMPREFIX2(X) #X
#define GETASMPREFIX(X) GETASMPREFIX2(X)
#define ASMPREFIX GETASMPREFIX(__USER_LABEL_PREFIX__)
// CompilationCallback stub - We can't use a C function with inline assembly in
// it, because we the prolog/epilog inserted by GCC won't work for us (we need
// to preserve more context and manipulate the stack directly). Instead,
// write our own wrapper, which does things our way, so we have complete
// control over register saving and restoring.
extern "C" {
// We don't need this on Android (generally on hand-held devices). This
// function is for the purpose of supporting "lazy symbol lookup" (lookup
// undefined symbol at runtime) (Actually, if you tried to remove the
// !defined(ANDROID) guard, you'll get compilation error since Android's
// toolchain choose armv5te as its CPU architecture which does not support
// instruction 'stmdb' and 'ldmia' within the function)
#if defined(__arm__) && !defined(ANDROID)
void ARMCompilationCallback();
asm(
".text\n"
".align 2\n"
".globl " ASMPREFIX "ARMCompilationCallback\n"
ASMPREFIX "ARMCompilationCallback:\n"
// Save caller saved registers since they may contain stuff
// for the real target function right now. We have to act as if this
// whole compilation callback doesn't exist as far as the caller is
// concerned, so we can't just preserve the callee saved regs.
"stmdb sp!, {r0, r1, r2, r3, lr}\n"
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
"fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
// The LR contains the address of the stub function on entry.
// pass it as the argument to the C part of the callback
"mov r0, lr\n"
"sub sp, sp, #4\n"
// Call the C portion of the callback
"bl " ASMPREFIX "ARMCompilationCallbackC\n"
"add sp, sp, #4\n"
// Restoring the LR to the return address of the function that invoked
// the stub and de-allocating the stack space for it requires us to
// swap the two saved LR values on the stack, as they're backwards
// for what we need since the pop instruction has a pre-determined
// order for the registers.
// +--------+
// 0 | LR | Original return address
// +--------+
// 1 | LR | Stub address (start of stub)
// 2-5 | R3..R0 | Saved registers (we need to preserve all regs)
// 6-20 | D0..D7 | Saved VFP registers
// +--------+
//
#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
// Restore VFP caller-saved registers.
"fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
//
// We need to exchange the values in slots 0 and 1 so we can
// return to the address in slot 1 with the address in slot 0
// restored to the LR.
"ldr r0, [sp,#20]\n"
"ldr r1, [sp,#16]\n"
"str r1, [sp,#20]\n"
"str r0, [sp,#16]\n"
// Return to the (newly modified) stub to invoke the real function.
// The above twiddling of the saved return addresses allows us to
// deallocate everything, including the LR the stub saved, all in one
// pop instruction.
"ldmia sp!, {r0, r1, r2, r3, lr, pc}\n"
);
#else // Not an ARM host
void ARMCompilationCallback() {
llvm_unreachable("Cannot call ARMCompilationCallback() on a non-ARM arch!");
}
#endif
}
/// ARMCompilationCallbackC - This is the target-specific function invoked
/// by the function stub when we did not know the real target of a call.
/// This function must locate the start of the stub or call site and pass
/// it into the JIT compiler function.
extern "C" void ARMCompilationCallbackC(intptr_t StubAddr) {
// Get the address of the compiled code for this function.
intptr_t NewVal = (intptr_t)JITCompilerFunction((void*)StubAddr);
// Rewrite the call target... so that we don't end up here every time we
// execute the call. We're replacing the first two instructions of the
// stub with:
// ldr pc, [pc,#-4]
// <addr>
if (!sys::Memory::setRangeWritable((void*)StubAddr, 8)) {
llvm_unreachable("ERROR: Unable to mark stub writable");
}
*(intptr_t *)StubAddr = 0xe51ff004; // ldr pc, [pc, #-4]
*(intptr_t *)(StubAddr+4) = NewVal;
if (!sys::Memory::setRangeExecutable((void*)StubAddr, 8)) {
llvm_unreachable("ERROR: Unable to mark stub executable");
}
}
TargetJITInfo::LazyResolverFn
ARMJITInfo::getLazyResolverFunction(JITCompilerFn F) {
JITCompilerFunction = F;
return ARMCompilationCallback;
}
void *ARMJITInfo::emitGlobalValueIndirectSym(const GlobalValue *GV, void *Ptr,
JITCodeEmitter &JCE) {
uint8_t Buffer[4];
uint8_t *Cur = Buffer;
MachineCodeEmitter::emitWordLEInto(Cur, (intptr_t)Ptr);
void *PtrAddr = JCE.allocIndirectGV(
GV, Buffer, sizeof(Buffer), /*Alignment=*/4);
addIndirectSymAddr(Ptr, (intptr_t)PtrAddr);
return PtrAddr;
}
TargetJITInfo::StubLayout ARMJITInfo::getStubLayout() {
// The stub contains up to 3 4-byte instructions, aligned at 4 bytes, and a
// 4-byte address. See emitFunctionStub for details.
StubLayout Result = {16, 4};
return Result;
}
void *ARMJITInfo::emitFunctionStub(const Function* F, void *Fn,
JITCodeEmitter &JCE) {
void *Addr;
// If this is just a call to an external function, emit a branch instead of a
// call. The code is the same except for one bit of the last instruction.
if (Fn != (void*)(intptr_t)ARMCompilationCallback) {
// Branch to the corresponding function addr.
if (IsPIC) {
// The stub is 16-byte size and 4-aligned.
intptr_t LazyPtr = getIndirectSymAddr(Fn);
if (!LazyPtr) {
// In PIC mode, the function stub is loading a lazy-ptr.
LazyPtr= (intptr_t)emitGlobalValueIndirectSym((GlobalValue*)F, Fn, JCE);
DEBUG(if (F)
errs() << "JIT: Indirect symbol emitted at [" << LazyPtr
<< "] for GV '" << F->getName() << "'\n";
else
errs() << "JIT: Stub emitted at [" << LazyPtr
<< "] for external function at '" << Fn << "'\n");
}
JCE.emitAlignment(4);
Addr = (void*)JCE.getCurrentPCValue();
if (!sys::Memory::setRangeWritable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub writable");
}
JCE.emitWordLE(0xe59fc004); // ldr ip, [pc, #+4]
JCE.emitWordLE(0xe08fc00c); // L_func$scv: add ip, pc, ip
JCE.emitWordLE(0xe59cf000); // ldr pc, [ip]
JCE.emitWordLE(LazyPtr - (intptr_t(Addr)+4+8)); // func - (L_func$scv+8)
sys::Memory::InvalidateInstructionCache(Addr, 16);
if (!sys::Memory::setRangeExecutable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub executable");
}
} else {
// The stub is 8-byte size and 4-aligned.
JCE.emitAlignment(4);
Addr = (void*)JCE.getCurrentPCValue();
if (!sys::Memory::setRangeWritable(Addr, 8)) {
llvm_unreachable("ERROR: Unable to mark stub writable");
}
JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
JCE.emitWordLE((intptr_t)Fn); // addr of function
sys::Memory::InvalidateInstructionCache(Addr, 8);
if (!sys::Memory::setRangeExecutable(Addr, 8)) {
llvm_unreachable("ERROR: Unable to mark stub executable");
}
}
} else {
// The compilation callback will overwrite the first two words of this
// stub with indirect branch instructions targeting the compiled code.
// This stub sets the return address to restart the stub, so that
// the new branch will be invoked when we come back.
//
// Branch and link to the compilation callback.
// The stub is 16-byte size and 4-byte aligned.
JCE.emitAlignment(4);
Addr = (void*)JCE.getCurrentPCValue();
if (!sys::Memory::setRangeWritable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub writable");
}
// Save LR so the callback can determine which stub called it.
// The compilation callback is responsible for popping this prior
// to returning.
JCE.emitWordLE(0xe92d4000); // push {lr}
// Set the return address to go back to the start of this stub.
JCE.emitWordLE(0xe24fe00c); // sub lr, pc, #12
// Invoke the compilation callback.
JCE.emitWordLE(0xe51ff004); // ldr pc, [pc, #-4]
// The address of the compilation callback.
JCE.emitWordLE((intptr_t)ARMCompilationCallback);
sys::Memory::InvalidateInstructionCache(Addr, 16);
if (!sys::Memory::setRangeExecutable(Addr, 16)) {
llvm_unreachable("ERROR: Unable to mark stub executable");
}
}
return Addr;
}
intptr_t ARMJITInfo::resolveRelocDestAddr(MachineRelocation *MR) const {
ARM::RelocationType RT = (ARM::RelocationType)MR->getRelocationType();
switch (RT) {
default:
return (intptr_t)(MR->getResultPointer());
case ARM::reloc_arm_pic_jt:
// Destination address - jump table base.
return (intptr_t)(MR->getResultPointer()) - MR->getConstantVal();
case ARM::reloc_arm_jt_base:
// Jump table base address.
return getJumpTableBaseAddr(MR->getJumpTableIndex());
case ARM::reloc_arm_cp_entry:
case ARM::reloc_arm_vfp_cp_entry:
// Constant pool entry address.
return getConstantPoolEntryAddr(MR->getConstantPoolIndex());
case ARM::reloc_arm_machine_cp_entry: {
ARMConstantPoolValue *ACPV = (ARMConstantPoolValue*)MR->getConstantVal();
assert((!ACPV->hasModifier() && !ACPV->mustAddCurrentAddress()) &&
"Can't handle this machine constant pool entry yet!");
intptr_t Addr = (intptr_t)(MR->getResultPointer());
Addr -= getPCLabelAddr(ACPV->getLabelId()) + ACPV->getPCAdjustment();
return Addr;
}
}
}
/// relocate - Before the JIT can run a block of code that has been emitted,
/// it must rewrite the code to contain the actual addresses of any
/// referenced global symbols.
void ARMJITInfo::relocate(void *Function, MachineRelocation *MR,
unsigned NumRelocs, unsigned char* GOTBase) {
for (unsigned i = 0; i != NumRelocs; ++i, ++MR) {
void *RelocPos = (char*)Function + MR->getMachineCodeOffset();
intptr_t ResultPtr = resolveRelocDestAddr(MR);
switch ((ARM::RelocationType)MR->getRelocationType()) {
case ARM::reloc_arm_cp_entry:
case ARM::reloc_arm_vfp_cp_entry:
case ARM::reloc_arm_relative: {
// It is necessary to calculate the correct PC relative value. We
// subtract the base addr from the target addr to form a byte offset.
ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
// If the result is positive, set bit U(23) to 1.
if (ResultPtr >= 0)
*((intptr_t*)RelocPos) |= 1 << ARMII::U_BitShift;
else {
// Otherwise, obtain the absolute value and set bit U(23) to 0.
*((intptr_t*)RelocPos) &= ~(1 << ARMII::U_BitShift);
ResultPtr = - ResultPtr;
}
// Set the immed value calculated.
// VFP immediate offset is multiplied by 4.
if (MR->getRelocationType() == ARM::reloc_arm_vfp_cp_entry)
ResultPtr = ResultPtr >> 2;
*((intptr_t*)RelocPos) |= ResultPtr;
// Set register Rn to PC.
*((intptr_t*)RelocPos) |=
ARMRegisterInfo::getRegisterNumbering(ARM::PC) << ARMII::RegRnShift;
break;
}
case ARM::reloc_arm_pic_jt:
case ARM::reloc_arm_machine_cp_entry:
case ARM::reloc_arm_absolute: {
// These addresses have already been resolved.
*((intptr_t*)RelocPos) |= (intptr_t)ResultPtr;
break;
}
case ARM::reloc_arm_branch: {
// It is necessary to calculate the correct value of signed_immed_24
// field. We subtract the base addr from the target addr to form a
// byte offset, which must be inside the range -33554432 and +33554428.
// Then, we set the signed_immed_24 field of the instruction to bits
// [25:2] of the byte offset. More details ARM-ARM p. A4-11.
ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
ResultPtr = (ResultPtr & 0x03FFFFFC) >> 2;
assert(ResultPtr >= -33554432 && ResultPtr <= 33554428);
*((intptr_t*)RelocPos) |= ResultPtr;
break;
}
case ARM::reloc_arm_jt_base: {
// JT base - (instruction addr + 8)
ResultPtr = ResultPtr - (intptr_t)RelocPos - 8;
*((intptr_t*)RelocPos) |= ResultPtr;
break;
}
case ARM::reloc_arm_movw: {
ResultPtr = ResultPtr & 0xFFFF;
*((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
*((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
break;
}
case ARM::reloc_arm_movt: {
ResultPtr = (ResultPtr >> 16) & 0xFFFF;
*((intptr_t*)RelocPos) |= ResultPtr & 0xFFF;
*((intptr_t*)RelocPos) |= ((ResultPtr >> 12) & 0xF) << 16; // imm4:imm12, Insts[19-16] = imm4, Insts[11-0] = imm12
break;
}
}
}
}
|