Skip to content
This repository has been archived by the owner on Feb 5, 2019. It is now read-only.

Commit

Permalink
Splitstack implementation for android
Browse files Browse the repository at this point in the history
  • Loading branch information
ILyoan authored and alexcrichton committed Feb 26, 2014
1 parent ae43eea commit b24ba55
Show file tree
Hide file tree
Showing 9 changed files with 250 additions and 7 deletions.
3 changes: 3 additions & 0 deletions lib/Target/AArch64/AArch64Subtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ class AArch64Subtarget : public AArch64GenSubtargetInfo {

bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); }
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isTargetAndroid() const {
return TargetTriple.getEnvironment() == Triple::Android;
}

bool hasFPARMv8() const { return HasFPARMv8; }
bool hasNEON() const { return HasNEON; }
Expand Down
218 changes: 218 additions & 0 deletions lib/Target/ARM/ARMFrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,9 @@
#include "ARMFrameLowering.h"
#include "ARMBaseInstrInfo.h"
#include "ARMBaseRegisterInfo.h"
#include "ARMInstrInfo.h"
#include "ARMMachineFunctionInfo.h"
#include "ARMTargetMachine.h"
#include "MCTargetDesc/ARMAddressingModes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
Expand Down Expand Up @@ -1603,3 +1605,219 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
MBB.erase(I);
}

// Get minimum constant for ARM instruction set that is greator than
// or equal to the argument.
// In ARM instruction, constant can have any value that can be
// produced by rotating an 8-bit value right by and even number
// of bits within a 32-bit word.
static uint32_t AlignToARMConstant(uint32_t Value) {
unsigned Shifted = 0;

if (Value == 0)
return 0;

while (!(Value & 0xC0000000)) {
Value = Value << 2;
Shifted += 2;
}

bool Carry = (Value & 0x00FFFFFF);
Value = ((Value & 0xFF000000) >> 24) + Carry;

if (Value & 0x0000100)
Value = Value & 0x000001FC;

if (Shifted > 24)
Value = Value >> (Shifted - 24);
else
Value = Value << (24 - Shifted);

return Value;
}

// The stack limit in the TCB is set to this manyu bytes above the actual
// stack limit.
static const uint64_t kSplitStackAvailable = 256;

// Adjust function prologue to enable split stack.
// Only support android.
void
ARMFrameLowering::adjustForSegmentedStacks(MachineFunction &MF) const {
const ARMSubtarget *ST = &MF.getTarget().getSubtarget<ARMSubtarget>();

// Doesn't support vararg function.
if (MF.getFunction()->isVarArg())
report_fatal_error("Segmented stacks do not support vararg functions.");
// Doesn't support other than android.
if (!ST->isTargetAndroid())
report_fatal_error("Segmented statks not supported on this platfrom.");

MachineBasicBlock &prologueMBB = MF.front();
MachineFrameInfo* MFI = MF.getFrameInfo();
const ARMBaseInstrInfo &TII = *TM.getInstrInfo();
ARMFunctionInfo* ARMFI = MF.getInfo<ARMFunctionInfo>();
DebugLoc DL;

// Use R4 and R5 as scratch register.
// We should save R4 and R5 before use it and restore before
// leave the function.
unsigned ScratchReg0 = ARM::R4;
unsigned ScratchReg1 = ARM::R5;
// Use the last tls slot.
unsigned TlsOffset = 63;
uint64_t AlignedStackSize;

MachineBasicBlock* prevStackMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock* postStackMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock* allocMBB = MF.CreateMachineBasicBlock();
MachineBasicBlock* checkMBB = MF.CreateMachineBasicBlock();

for (MachineBasicBlock::livein_iterator i = prologueMBB.livein_begin(),
e = prologueMBB.livein_end(); i != e; ++i) {
allocMBB->addLiveIn(*i);
checkMBB->addLiveIn(*i);
prevStackMBB->addLiveIn(*i);
postStackMBB->addLiveIn(*i);
}

MF.push_front(postStackMBB);
MF.push_front(allocMBB);
MF.push_front(checkMBB);
MF.push_front(prevStackMBB);

// The required stack size that is aligend to ARM constant critarion.
uint64_t StackSize = MFI->getStackSize();

// If the front-end requested a fixed stack segment size, use that.
if (MF.getFunction()->hasFnAttribute("fixedstacksegment")) {
StackSize = MF.getTarget().Options.FixedStackSegmentSize;
}

AlignedStackSize = AlignToARMConstant(StackSize)

// When the frame size is less than 256 we just compare the stack
// boundary directly to the value of the stack pointer, per gcc.
bool CompareStackPointer = AlignedStackSize < kSplitStackAvailable;

// We will use two of callee save registers as scratch register so we
// need to save those registers into stack frame before use it.
// We will use SR0 to hold stack limit and SR1 to stack size requested.
// and arguments for __morestack().
// SR0: Scratch Register #0
// SR1: Scratch Register #1
// push {SR0, SR1}
AddDefaultPred(BuildMI(prevStackMBB, DL, TII.get(ARM::STMDB_UPD))
.addReg(ARM::SP, RegState::Define)
.addReg(ARM::SP))
.addReg(ScratchReg0)
.addReg(ScratchReg1);

if (CompareStackPointer) {
// mov SR1, sp
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::MOVr), ScratchReg1)
.addReg(ARM::SP)).addReg(0);
} else {
// sub SR1, sp, #StackSize
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::SUBri), ScratchReg1)
.addReg(ARM::SP).addImm(AlignedStackSize)).addReg(0);
}

// Get TLS base address.
// mrc p15, #0, SR0, c13, c0, #3
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::MRC), ScratchReg0)
.addImm(15)
.addImm(0)
.addImm(13)
.addImm(0)
.addImm(3));

// The last slot, assume that the last tls slot holds the stack limit
// add SR0, SR0, #252
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::ADDri), ScratchReg0)
.addReg(ScratchReg0).addImm(4*TlsOffset)).addReg(0);

// Get stack limit.
// ldr SR0, [sr0]
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::LDRi12), ScratchReg0)
.addReg(ScratchReg0).addImm(0));

// Compare stack limit with stack size requested.
// cmp SR0, SR1
AddDefaultPred(BuildMI(checkMBB, DL, TII.get(ARM::CMPrr))
.addReg(ScratchReg0)
.addReg(ScratchReg1));

// This jump is taken if StackLimit < SP - stack required.
BuildMI(checkMBB, DL, TII.get(ARM::Bcc)).addMBB(postStackMBB)
.addImm(ARMCC::LO)
.addReg(ARM::CPSR);


// Calling __morestack(StackSize, Size of stack arguments).
// __morestack knows that the stack size requested is in SR0(r4)
// and amount size of stack arguments is in SR1(r5).

// Pass first argument for the __morestack by Scratch Register #0.
// The amount size of stack required
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVi), ScratchReg0)
.addImm(AlignedStackSize)).addReg(0);
// Pass second argument for the __morestack by Scratch Register #1.
// The amount size of stack consumed to save function arguments.
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVi), ScratchReg1)
.addImm(AlignToARMConstant(ARMFI->getArgumentStackSize())))
.addReg(0);

// push {lr} - Save return address of this function.
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::STMDB_UPD))
.addReg(ARM::SP, RegState::Define)
.addReg(ARM::SP))
.addReg(ARM::LR);

// Call __morestack().
BuildMI(allocMBB, DL, TII.get(ARM::BL))
.addExternalSymbol("__morestack");

// Restore return address of this original function.
// pop {lr}
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::LDMIA_UPD))
.addReg(ARM::SP, RegState::Define)
.addReg(ARM::SP))
.addReg(ARM::LR);


// Restore SR0 and SR1 in case of __morestack() was called.
// __morestack() will skip postStackMBB block so we need to restore
// scratch registers from here.
// pop {SR0, SR1}
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::LDMIA_UPD))
.addReg(ARM::SP, RegState::Define)
.addReg(ARM::SP))
.addReg(ScratchReg0)
.addReg(ScratchReg1);

// Return from this function.
AddDefaultPred(BuildMI(allocMBB, DL, TII.get(ARM::MOVr), ARM::PC)
.addReg(ARM::LR)).addReg(0);

// Restore SR0 and SR1 in case of __morestack() was not called.
// pop {SR0, SR1}
AddDefaultPred(BuildMI(postStackMBB, DL, TII.get(ARM::LDMIA_UPD))
.addReg(ARM::SP, RegState::Define)
.addReg(ARM::SP))
.addReg(ScratchReg0)
.addReg(ScratchReg1);

// Organizing MBB lists
postStackMBB->addSuccessor(&prologueMBB);

allocMBB->addSuccessor(postStackMBB);

checkMBB->addSuccessor(postStackMBB);
checkMBB->addSuccessor(allocMBB);

prevStackMBB->addSuccessor(checkMBB);

#ifdef XDEBUG
MF.verify();
#endif
}
8 changes: 6 additions & 2 deletions lib/Target/ARM/ARMFrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ namespace llvm {

class ARMFrameLowering : public TargetFrameLowering {
protected:
const ARMBaseTargetMachine &TM;
const ARMSubtarget &STI;

public:
explicit ARMFrameLowering(const ARMSubtarget &sti)
explicit ARMFrameLowering(const ARMBaseTargetMachine& tm,
const ARMSubtarget &sti)
: TargetFrameLowering(StackGrowsDown, sti.getStackAlignment(), 0, 4),
STI(sti) {
TM(tm), STI(sti) {
}

/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
Expand Down Expand Up @@ -59,6 +61,8 @@ class ARMFrameLowering : public TargetFrameLowering {
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS) const;

void adjustForSegmentedStacks(MachineFunction &MF) const;

private:
void emitPushInst(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI, unsigned StmOpc,
Expand Down
2 changes: 2 additions & 0 deletions lib/Target/ARM/ARMISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3060,6 +3060,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
VarArgStyleRegisters(CCInfo, DAG, dl, Chain,
CCInfo.getNextStackOffset());

AFI->setArgumentStackSize(CCInfo.getNextStackOffset());

return Chain;
}

Expand Down
7 changes: 7 additions & 0 deletions lib/Target/ARM/ARMMachineFunctionInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ class ARMFunctionInfo : public MachineFunctionInfo {
/// relocation models.
unsigned GlobalBaseReg;

/// ArgumentStackSize - amount of bytes on stack consumed by the arguments
/// being passed on the stack
unsigned ArgumentStackSize;

public:
ARMFunctionInfo() :
isThumb(false),
Expand Down Expand Up @@ -182,6 +186,9 @@ class ARMFunctionInfo : public MachineFunctionInfo {
void setGPRCalleeSavedArea2Size(unsigned s) { GPRCS2Size = s; }
void setDPRCalleeSavedAreaSize(unsigned s) { DPRCSSize = s; }

unsigned getArgumentStackSize() const { return ArgumentStackSize; }
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }

unsigned createJumpTableUId() {
return JumpTableUId++;
}
Expand Down
3 changes: 3 additions & 0 deletions lib/Target/ARM/ARMSubtarget.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,6 +346,9 @@ class ARMSubtarget : public ARMGenSubtargetInfo {
return TargetTriple.getEnvironment() == Triple::GNUEABIHF ||
TargetTriple.getEnvironment() == Triple::EABIHF;
}
bool isTargetAndroid() const {
return TargetTriple.getEnvironment() == Triple::Android;
}

bool isAPCS_ABI() const {
assert(TargetABI != ARM_ABI_UNKNOWN);
Expand Down
6 changes: 3 additions & 3 deletions lib/Target/ARM/ARMTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, StringRef TT,
DL(computeDataLayout(Subtarget)),
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget) {
FrameLowering(*this, Subtarget) {
initAsmInfo();
if (!Subtarget.hasARMOps())
report_fatal_error("CPU: '" + Subtarget.getCPUString() + "' does not "
Expand All @@ -146,8 +146,8 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, StringRef TT,
TLInfo(*this),
TSInfo(*this),
FrameLowering(Subtarget.hasThumb2()
? new ARMFrameLowering(Subtarget)
: (ARMFrameLowering*)new Thumb1FrameLowering(Subtarget)) {
? new ARMFrameLowering(*this, Subtarget)
: (ARMFrameLowering*)new Thumb1FrameLowering(*this, Subtarget)) {
initAsmInfo();
}

Expand Down
4 changes: 4 additions & 0 deletions lib/Target/ARM/ARMTargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ class ARMBaseTargetMachine : public LLVMTargetMachine {
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);

virtual bool addCodeEmitter(PassManagerBase &PM, JITCodeEmitter &MCE);

virtual const ARMBaseInstrInfo *getInstrInfo() const {
llvm_unreachable("getInstrInfo not implemented");
}
};

/// ARMTargetMachine - ARM target machine.
Expand Down
6 changes: 4 additions & 2 deletions lib/Target/ARM/Thumb1FrameLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
#include "llvm/Target/TargetFrameLowering.h"

namespace llvm {
class ARMBaseTargetMachine;
class ARMSubtarget;

class Thumb1FrameLowering : public ARMFrameLowering {
public:
explicit Thumb1FrameLowering(const ARMSubtarget &sti)
: ARMFrameLowering(sti) {
explicit Thumb1FrameLowering(const ARMBaseTargetMachine &tm,
const ARMSubtarget &sti)
: ARMFrameLowering(tm, sti) {
}

/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
Expand Down

0 comments on commit b24ba55

Please sign in to comment.