22 files changed, 1999 insertions, 1241 deletions
diff --git a/lib/ExecutionEngine/CMakeLists.txt b/lib/ExecutionEngine/CMakeLists.txt
index cb11bfe..3102c7b 100644
--- a/lib/ExecutionEngine/CMakeLists.txt
+++ b/lib/ExecutionEngine/CMakeLists.txt
@@ -3,6 +3,7 @@
 add_llvm_library(LLVMExecutionEngine
   ExecutionEngine.cpp
   ExecutionEngineBindings.cpp
+  RTDyldMemoryManager.cpp
   TargetSelect.cpp
   )
 
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 3d59d25..c463e9f 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -14,6 +14,7 @@
 
 #define DEBUG_TYPE "jit"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/JITMemoryManager.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
@@ -47,7 +48,7 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(
 ExecutionEngine *(*ExecutionEngine::MCJITCtor)(
   Module *M,
   std::string *ErrorStr,
-  JITMemoryManager *JMM,
+  RTDyldMemoryManager *MCJMM,
   bool GVsWithCode,
   TargetMachine *TM) = 0;
 ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M,
@@ -117,7 +118,7 @@ char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) {
 }
 
 bool ExecutionEngine::removeModule(Module *M) {
-  for(SmallVector<Module *, 1>::iterator I = Modules.begin(),
+  for(SmallVectorImpl<Module *>::iterator I = Modules.begin(),
         E = Modules.end(); I != E; ++I) {
     Module *Found = *I;
     if (Found == M) {
@@ -455,10 +456,12 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
   if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
     return 0;
 
+  assert(!(JMM && MCJMM));
+  
   // If the user specified a memory manager but didn't specify which engine to
   // create, we assume they only want the JIT, and we fail if they only want
   // the interpreter.
-  if (JMM) {
+  if (JMM || MCJMM) {
     if (WhichEngine & EngineKind::JIT)
       WhichEngine = EngineKind::JIT;
     else {
@@ -467,6 +470,14 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
       return 0;
     }
   }
+  
+  if (MCJMM && ! UseMCJIT) {
+    if (ErrorStr)
+      *ErrorStr =
+        "Cannot create a legacy JIT with a runtime dyld memory "
+        "manager.";
+    return 0;
+  }
 
   // Unless the interpreter was explicitly selected or the JIT is not linked,
   // try making a JIT.
@@ -480,7 +491,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) {
 
     if (UseMCJIT && ExecutionEngine::MCJITCtor) {
       ExecutionEngine *EE =
-        ExecutionEngine::MCJITCtor(M, ErrorStr, JMM,
+        ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM,
                                    AllocateGVsWithCode, TheTM.take());
       if (EE) return EE;
     } else if (ExecutionEngine::JITCtor) {
@@ -535,6 +546,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
   if (isa<UndefValue>(C)) {
     GenericValue Result;
     switch (C->getType()->getTypeID()) {
+    default:
+      break;
     case Type::IntegerTyID:
     case Type::X86_FP80TyID:
     case Type::FP128TyID:
@@ -543,7 +556,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
       // with the correct bit width.
       Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0);
       break;
-    default:
+    case Type::VectorTyID:
+      // if the whole vector is 'undef' just reserve memory for the value.
+      const VectorType* VTy = dyn_cast<VectorType>(C->getType());
+      const Type *ElemTy = VTy->getElementType();
+      unsigned int elemNum = VTy->getNumElements();
+      Result.AggregateVal.resize(elemNum);
+      if (ElemTy->isIntegerTy())
+        for (unsigned int i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = 
+            APInt(ElemTy->getPrimitiveSizeInBits(), 0);
       break;
     }
     return Result;
@@ -825,6 +847,101 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
     else
       llvm_unreachable("Unknown constant pointer type!");
     break;
+  case Type::VectorTyID: {
+    unsigned elemNum;
+    Type* ElemTy;
+    const ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(C);
+    const ConstantVector *CV = dyn_cast<ConstantVector>(C);
+    const ConstantAggregateZero *CAZ = dyn_cast<ConstantAggregateZero>(C);
+
+    if (CDV) {
+        elemNum = CDV->getNumElements();
+        ElemTy = CDV->getElementType();
+    } else if (CV || CAZ) {
+        VectorType* VTy = dyn_cast<VectorType>(C->getType());
+        elemNum = VTy->getNumElements();
+        ElemTy = VTy->getElementType();
+    } else {
+        llvm_unreachable("Unknown constant vector type!");
+    }
+
+    Result.AggregateVal.resize(elemNum);
+    // Check if vector holds floats.
+    if(ElemTy->isFloatTy()) {
+      if (CAZ) {
+        GenericValue floatZero;
+        floatZero.FloatVal = 0.f;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  floatZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].FloatVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToFloat();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i);
+
+      break;
+    }
+    // Check if vector holds doubles.
+    if (ElemTy->isDoubleTy()) {
+      if (CAZ) {
+        GenericValue doubleZero;
+        doubleZero.DoubleVal = 0.0;
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  doubleZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].DoubleVal = cast<ConstantFP>(
+              CV->getOperand(i))->getValueAPF().convertToDouble();
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i);
+
+      break;
+    }
+    // Check if vector holds integers.
+    if (ElemTy->isIntegerTy()) {
+      if (CAZ) {
+        GenericValue intZero;     
+        intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull);
+        std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(),
+                  intZero);
+        break;
+      }
+      if(CV) {
+        for (unsigned i = 0; i < elemNum; ++i)
+          if (!isa<UndefValue>(CV->getOperand(i)))
+            Result.AggregateVal[i].IntVal = cast<ConstantInt>(
+                                            CV->getOperand(i))->getValue();
+          else {
+            Result.AggregateVal[i].IntVal =
+              APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0);
+          }
+        break;
+      }
+      if(CDV)
+        for (unsigned i = 0; i < elemNum; ++i)
+          Result.AggregateVal[i].IntVal = APInt(
+            CDV->getElementType()->getPrimitiveSizeInBits(),
+            CDV->getElementAsInteger(i));
+
+      break;
+    }
+    llvm_unreachable("Unknown constant pointer type!");
+  }
+  break;
+
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
@@ -842,7 +959,7 @@ static void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
   assert((IntVal.getBitWidth()+7)/8 >= StoreBytes && "Integer too small!");
   const uint8_t *Src = (const uint8_t *)IntVal.getRawData();
 
-  if (sys::isLittleEndianHost()) {
+  if (sys::IsLittleEndianHost) {
     // Little-endian host - the source is ordered from LSB to MSB.  Order the
     // destination from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, StoreBytes);
@@ -866,6 +983,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
   const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty);
 
   switch (Ty->getTypeID()) {
+  default:
+    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+    break;
   case Type::IntegerTyID:
     StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes);
     break;
@@ -885,11 +1005,22 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val,
 
     *((PointerTy*)Ptr) = Val.PointerVal;
     break;
-  default:
-    dbgs() << "Cannot store value of type " << *Ty << "!\n";
+  case Type::VectorTyID:
+    for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) {
+      if (cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+        *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal;
+      if (cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal;
+      if (cast<VectorType>(Ty)->getElementType()->isIntegerTy()) {
+        unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8;
+        StoreIntToMemory(Val.AggregateVal[i].IntVal, 
+          (uint8_t*)Ptr + numOfBytes*i, numOfBytes);
+      }
+    }
+    break;
   }
 
-  if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian())
+  if (sys::IsLittleEndianHost != getDataLayout()->isLittleEndian())
     // Host and target are different endian - reverse the stored bytes.
     std::reverse((uint8_t*)Ptr, StoreBytes + (uint8_t*)Ptr);
 }
@@ -901,7 +1032,7 @@ static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
   uint8_t *Dst = reinterpret_cast<uint8_t *>(
                    const_cast<uint64_t *>(IntVal.getRawData()));
 
-  if (sys::isLittleEndianHost())
+  if (sys::IsLittleEndianHost)
     // Little-endian host - the destination must be ordered from LSB to MSB.
     // The source is ordered from LSB to MSB: Do a straight copy.
     memcpy(Dst, Src, LoadBytes);
@@ -951,6 +1082,31 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result,
     Result.IntVal = APInt(80, y);
     break;
   }
+  case Type::VectorTyID: {
+    const VectorType *VT = cast<VectorType>(Ty);
+    const Type *ElemT = VT->getElementType();
+    const unsigned numElems = VT->getNumElements();
+    if (ElemT->isFloatTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].FloatVal = *((float*)Ptr+i);
+    }
+    if (ElemT->isDoubleTy()) {
+      Result.AggregateVal.resize(numElems);
+      for (unsigned i = 0; i < numElems; ++i)
+        Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i);
+    }
+    if (ElemT->isIntegerTy()) {
+      GenericValue intZero;
+      const unsigned elemBitWidth = cast<IntegerType>(ElemT)->getBitWidth();
+      intZero.IntVal = APInt(elemBitWidth, 0);
+      Result.AggregateVal.resize(numElems, intZero);
+      for (unsigned i = 0; i < numElems; ++i)
+        LoadIntFromMemory(Result.AggregateVal[i].IntVal,
+          (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8);
+    }
+  break;
+  }
   default:
     SmallString<256> Msg;
     raw_svector_ostream OS(Msg);
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index f4e8246..88e73bf 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -15,11 +15,34 @@
 #include "llvm-c/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
 #include "llvm/ExecutionEngine/GenericValue.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/IR/DerivedTypes.h"
+#include "llvm/IR/Module.h"
 #include "llvm/Support/ErrorHandling.h"
 #include <cstring>
 
 using namespace llvm;
 
+// Wrapping the C bindings types.
+DEFINE_SIMPLE_CONVERSION_FUNCTIONS(GenericValue, LLVMGenericValueRef)
+
+inline DataLayout *unwrap(LLVMTargetDataRef P) {
+  return reinterpret_cast<DataLayout*>(P);
+}
+  
+inline LLVMTargetDataRef wrap(const DataLayout *P) {
+  return reinterpret_cast<LLVMTargetDataRef>(const_cast<DataLayout*>(P));
+}
+
+inline TargetLibraryInfo *unwrap(LLVMTargetLibraryInfoRef P) {
+  return reinterpret_cast<TargetLibraryInfo*>(P);
+}
+
+inline LLVMTargetLibraryInfoRef wrap(const TargetLibraryInfo *P) {
+  TargetLibraryInfo *X = const_cast<TargetLibraryInfo*>(P);
+  return reinterpret_cast<LLVMTargetLibraryInfoRef>(X);
+}
+
 /*===-- Operations on generic values --------------------------------------===*/
 
 LLVMGenericValueRef LLVMCreateGenericValueOfInt(LLVMTypeRef Ty,
@@ -132,6 +155,59 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
   return 1;
 }
 
+void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions,
+                                        size_t SizeOfPassedOptions) {
+  LLVMMCJITCompilerOptions options;
+  memset(&options, 0, sizeof(options)); // Most fields are zero by default.
+  options.CodeModel = LLVMCodeModelJITDefault;
+  
+  memcpy(PassedOptions, &options,
+         std::min(sizeof(options), SizeOfPassedOptions));
+}
+
+LLVMBool LLVMCreateMCJITCompilerForModule(
+    LLVMExecutionEngineRef *OutJIT, LLVMModuleRef M,
+    LLVMMCJITCompilerOptions *PassedOptions, size_t SizeOfPassedOptions,
+    char **OutError) {
+  LLVMMCJITCompilerOptions options;
+  // If the user passed a larger sized options struct, then they were compiled
+  // against a newer LLVM. Tell them that something is wrong.
+  if (SizeOfPassedOptions > sizeof(options)) {
+    *OutError = strdup(
+      "Refusing to use options struct that is larger than my own; assuming "
+      "LLVM library mismatch.");
+    return 1;
+  }
+  
+  // Defend against the user having an old version of the API by ensuring that
+  // any fields they didn't see are cleared. We must defend against fields being
+  // set to the bitwise equivalent of zero, and assume that this means "do the
+  // default" as if that option hadn't been available.
+  LLVMInitializeMCJITCompilerOptions(&options, sizeof(options));
+  memcpy(&options, PassedOptions, SizeOfPassedOptions);
+  
+  TargetOptions targetOptions;
+  targetOptions.NoFramePointerElim = options.NoFramePointerElim;
+  targetOptions.EnableFastISel = options.EnableFastISel;
+
+  std::string Error;
+  EngineBuilder builder(unwrap(M));
+  builder.setEngineKind(EngineKind::JIT)
+         .setErrorStr(&Error)
+         .setUseMCJIT(true)
+         .setOptLevel((CodeGenOpt::Level)options.OptLevel)
+         .setCodeModel(unwrap(options.CodeModel))
+         .setTargetOptions(targetOptions);
+  if (options.MCJMM)
+    builder.setMCJITMemoryManager(unwrap(options.MCJMM));
+  if (ExecutionEngine *JIT = builder.create()) {
+    *OutJIT = wrap(JIT);
+    return 0;
+  }
+  *OutError = strdup(Error.c_str());
+  return 1;
+}
+
 LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
                                    LLVMModuleProviderRef MP,
                                    char **OutError) {
@@ -176,6 +252,8 @@ void LLVMRunStaticDestructors(LLVMExecutionEngineRef EE) {
 int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
                           unsigned ArgC, const char * const *ArgV,
                           const char * const *EnvP) {
+  unwrap(EE)->finalizeObject();
+  
   std::vector<std::string> ArgVec;
   for (unsigned I = 0; I != ArgC; ++I)
     ArgVec.push_back(ArgV[I]);
@@ -186,6 +264,8 @@ int LLVMRunFunctionAsMain(LLVMExecutionEngineRef EE, LLVMValueRef F,
 LLVMGenericValueRef LLVMRunFunction(LLVMExecutionEngineRef EE, LLVMValueRef F,
                                     unsigned NumArgs,
                                     LLVMGenericValueRef *Args) {
+  unwrap(EE)->finalizeObject();
+  
   std::vector<GenericValue> ArgVec;
   ArgVec.reserve(NumArgs);
   for (unsigned I = 0; I != NumArgs; ++I)
@@ -234,7 +314,8 @@ LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
   return 1;
 }
 
-void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE, LLVMValueRef Fn) {
+void *LLVMRecompileAndRelinkFunction(LLVMExecutionEngineRef EE,
+                                     LLVMValueRef Fn) {
   return unwrap(EE)->recompileAndRelinkFunction(unwrap<Function>(Fn));
 }
 
@@ -248,5 +329,114 @@ void LLVMAddGlobalMapping(LLVMExecutionEngineRef EE, LLVMValueRef Global,
 }
 
 void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) {
+  unwrap(EE)->finalizeObject();
+  
   return unwrap(EE)->getPointerToGlobal(unwrap<GlobalValue>(Global));
 }
+
+/*===-- Operations on memory managers -------------------------------------===*/
+
+namespace {
+
+struct SimpleBindingMMFunctions {
+  uint8_t *(*AllocateCodeSection)(void *Opaque,
+                                  uintptr_t Size, unsigned Alignment,
+                                  unsigned SectionID);
+  uint8_t *(*AllocateDataSection)(void *Opaque,
+                                  uintptr_t Size, unsigned Alignment,
+                                  unsigned SectionID, LLVMBool IsReadOnly);
+  LLVMBool (*FinalizeMemory)(void *Opaque, char **ErrMsg);
+  void (*Destroy)(void *Opaque);
+};
+
+class SimpleBindingMemoryManager : public RTDyldMemoryManager {
+public:
+  SimpleBindingMemoryManager(const SimpleBindingMMFunctions& Functions,
+                             void *Opaque);
+  virtual ~SimpleBindingMemoryManager();
+  
+  virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID);
+
+  virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment,
+                                       unsigned SectionID,
+                                       bool isReadOnly);
+
+  virtual bool finalizeMemory(std::string *ErrMsg);
+  
+private:
+  SimpleBindingMMFunctions Functions;
+  void *Opaque;
+};
+
+SimpleBindingMemoryManager::SimpleBindingMemoryManager(
+  const SimpleBindingMMFunctions& Functions,
+  void *Opaque)
+  : Functions(Functions), Opaque(Opaque) {
+  assert(Functions.AllocateCodeSection &&
+         "No AllocateCodeSection function provided!");
+  assert(Functions.AllocateDataSection &&
+         "No AllocateDataSection function provided!");
+  assert(Functions.FinalizeMemory &&
+         "No FinalizeMemory function provided!");
+  assert(Functions.Destroy &&
+         "No Destroy function provided!");
+}
+
+SimpleBindingMemoryManager::~SimpleBindingMemoryManager() {
+  Functions.Destroy(Opaque);
+}
+
+uint8_t *SimpleBindingMemoryManager::allocateCodeSection(
+  uintptr_t Size, unsigned Alignment, unsigned SectionID) {
+  return Functions.AllocateCodeSection(Opaque, Size, Alignment, SectionID);
+}
+
+uint8_t *SimpleBindingMemoryManager::allocateDataSection(
+  uintptr_t Size, unsigned Alignment, unsigned SectionID, bool isReadOnly) {
+  return Functions.AllocateDataSection(Opaque, Size, Alignment, SectionID,
+                                       isReadOnly);
+}
+
+bool SimpleBindingMemoryManager::finalizeMemory(std::string *ErrMsg) {
+  char *errMsgCString = 0;
+  bool result = Functions.FinalizeMemory(Opaque, &errMsgCString);
+  assert((result || !errMsgCString) &&
+         "Did not expect an error message if FinalizeMemory succeeded");
+  if (errMsgCString) {
+    if (ErrMsg)
+      *ErrMsg = errMsgCString;
+    free(errMsgCString);
+  }
+  return result;
+}
+
+} // anonymous namespace
+
+LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager(
+  void *Opaque,
+  uint8_t *(*AllocateCodeSection)(void *Opaque,
+                                  uintptr_t Size, unsigned Alignment,
+                                  unsigned SectionID),
+  uint8_t *(*AllocateDataSection)(void *Opaque,
+                                  uintptr_t Size, unsigned Alignment,
+                                  unsigned SectionID, LLVMBool IsReadOnly),
+  LLVMBool (*FinalizeMemory)(void *Opaque, char **ErrMsg),
+  void (*Destroy)(void *Opaque)) {
+  
+  if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory ||
+      !Destroy)
+    return NULL;
+  
+  SimpleBindingMMFunctions functions;
+  functions.AllocateCodeSection = AllocateCodeSection;
+  functions.AllocateDataSection = AllocateDataSection;
+  functions.FinalizeMemory = FinalizeMemory;
+  functions.Destroy = Destroy;
+  return wrap(new SimpleBindingMemoryManager(functions, Opaque));
+}
+
+void LLVMDisposeMCJITMemoryManager(LLVMMCJITMemoryManagerRef MM) {
+  delete unwrap(MM);
+}
+
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index ec4f7f6..fc3d579 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -114,6 +114,15 @@ static void executeFRemInst(GenericValue &Dest, GenericValue Src1,
       Dest.IntVal = APInt(1,Src1.IntVal.OP(Src2.IntVal)); \
       break;
 
+#define IMPLEMENT_VECTOR_INTEGER_ICMP(OP, TY)                        \
+  case Type::VectorTyID: {                                           \
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());    \
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );            \
+    for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)             \
+      Dest.AggregateVal[_i].IntVal = APInt(1,                        \
+      Src1.AggregateVal[_i].IntVal.OP(Src2.AggregateVal[_i].IntVal));\
+  } break;
+
 // Handle pointers specially because they must be compared with only as much
 // width as the host has.  We _do not_ want to be comparing 64 bit values when
 // running on a 32-bit target, otherwise the upper 32 bits might mess up
@@ -129,6 +138,7 @@ static GenericValue executeICMP_EQ(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(eq,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(eq,Ty);
     IMPLEMENT_POINTER_ICMP(==);
   default:
     dbgs() << "Unhandled type for ICMP_EQ predicate: " << *Ty << "\n";
@@ -142,6 +152,7 @@ static GenericValue executeICMP_NE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ne,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ne,Ty);
     IMPLEMENT_POINTER_ICMP(!=);
   default:
     dbgs() << "Unhandled type for ICMP_NE predicate: " << *Ty << "\n";
@@ -155,6 +166,7 @@ static GenericValue executeICMP_ULT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ult,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ult,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
     dbgs() << "Unhandled type for ICMP_ULT predicate: " << *Ty << "\n";
@@ -168,6 +180,7 @@ static GenericValue executeICMP_SLT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(slt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(slt,Ty);
     IMPLEMENT_POINTER_ICMP(<);
   default:
     dbgs() << "Unhandled type for ICMP_SLT predicate: " << *Ty << "\n";
@@ -181,6 +194,7 @@ static GenericValue executeICMP_UGT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ugt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ugt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
     dbgs() << "Unhandled type for ICMP_UGT predicate: " << *Ty << "\n";
@@ -194,6 +208,7 @@ static GenericValue executeICMP_SGT(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sgt,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sgt,Ty);
     IMPLEMENT_POINTER_ICMP(>);
   default:
     dbgs() << "Unhandled type for ICMP_SGT predicate: " << *Ty << "\n";
@@ -207,6 +222,7 @@ static GenericValue executeICMP_ULE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(ule,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(ule,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
     dbgs() << "Unhandled type for ICMP_ULE predicate: " << *Ty << "\n";
@@ -220,6 +236,7 @@ static GenericValue executeICMP_SLE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sle,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sle,Ty);
     IMPLEMENT_POINTER_ICMP(<=);
   default:
     dbgs() << "Unhandled type for ICMP_SLE predicate: " << *Ty << "\n";
@@ -233,6 +250,7 @@ static GenericValue executeICMP_UGE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(uge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(uge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
     dbgs() << "Unhandled type for ICMP_UGE predicate: " << *Ty << "\n";
@@ -246,6 +264,7 @@ static GenericValue executeICMP_SGE(GenericValue Src1, GenericValue Src2,
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_INTEGER_ICMP(sge,Ty);
+    IMPLEMENT_VECTOR_INTEGER_ICMP(sge,Ty);
     IMPLEMENT_POINTER_ICMP(>=);
   default:
     dbgs() << "Unhandled type for ICMP_SGE predicate: " << *Ty << "\n";
@@ -285,12 +304,29 @@ void Interpreter::visitICmpInst(ICmpInst &I) {
      Dest.IntVal = APInt(1,Src1.TY##Val OP Src2.TY##Val); \
      break
 
+#define IMPLEMENT_VECTOR_FCMP_T(OP, TY)                             \
+  assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());     \
+  Dest.AggregateVal.resize( Src1.AggregateVal.size() );             \
+  for( uint32_t _i=0;_i<Src1.AggregateVal.size();_i++)              \
+    Dest.AggregateVal[_i].IntVal = APInt(1,                         \
+    Src1.AggregateVal[_i].TY##Val OP Src2.AggregateVal[_i].TY##Val);\
+  break;
+
+#define IMPLEMENT_VECTOR_FCMP(OP)                                   \
+  case Type::VectorTyID:                                            \
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {   \
+      IMPLEMENT_VECTOR_FCMP_T(OP, Float);                           \
+    } else {                                                        \
+        IMPLEMENT_VECTOR_FCMP_T(OP, Double);                        \
+    }
+
 static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(==, Float);
     IMPLEMENT_FCMP(==, Double);
+    IMPLEMENT_VECTOR_FCMP(==);
   default:
     dbgs() << "Unhandled type for FCmp EQ instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -298,17 +334,65 @@ static GenericValue executeFCMP_OEQ(GenericValue Src1, GenericValue Src2,
   return Dest;
 }
 
+#define IMPLEMENT_SCALAR_NANS(TY, X,Y)                                      \
+  if (TY->isFloatTy()) {                                                    \
+    if (X.FloatVal != X.FloatVal || Y.FloatVal != Y.FloatVal) {             \
+      Dest.IntVal = APInt(1,false);                                         \
+      return Dest;                                                          \
+    }                                                                       \
+  } else {                                                                  \
+    if (X.DoubleVal != X.DoubleVal || Y.DoubleVal != Y.DoubleVal) {         \
+      Dest.IntVal = APInt(1,false);                                         \
+      return Dest;                                                          \
+    }                                                                       \
+  }
+
+#define MASK_VECTOR_NANS_T(X,Y, TZ, FLAG)                                   \
+  assert(X.AggregateVal.size() == Y.AggregateVal.size());                   \
+  Dest.AggregateVal.resize( X.AggregateVal.size() );                        \
+  for( uint32_t _i=0;_i<X.AggregateVal.size();_i++) {                       \
+    if (X.AggregateVal[_i].TZ##Val != X.AggregateVal[_i].TZ##Val ||         \
+        Y.AggregateVal[_i].TZ##Val != Y.AggregateVal[_i].TZ##Val)           \
+      Dest.AggregateVal[_i].IntVal = APInt(1,FLAG);                         \
+    else  {                                                                 \
+      Dest.AggregateVal[_i].IntVal = APInt(1,!FLAG);                        \
+    }                                                                       \
+  }
+
+#define MASK_VECTOR_NANS(TY, X,Y, FLAG)                                     \
+  if (TY->isVectorTy()) {                                                   \
+    if (dyn_cast<VectorType>(TY)->getElementType()->isFloatTy()) {          \
+      MASK_VECTOR_NANS_T(X, Y, Float, FLAG)                                 \
+    } else {                                                                \
+      MASK_VECTOR_NANS_T(X, Y, Double, FLAG)                                \
+    }                                                                       \
+  }                                                                         \
+
+
+
 static GenericValue executeFCMP_ONE(GenericValue Src1, GenericValue Src2,
-                                   Type *Ty) {
+                                    Type *Ty)
+{
   GenericValue Dest;
+  // if input is scalar value and Src1 or Src2 is NaN return false
+  IMPLEMENT_SCALAR_NANS(Ty, Src1, Src2)
+  // if vector input detect NaNs and fill mask
+  MASK_VECTOR_NANS(Ty, Src1, Src2, false)
+  GenericValue DestMask = Dest;
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(!=, Float);
     IMPLEMENT_FCMP(!=, Double);
-
-  default:
-    dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
-    llvm_unreachable(0);
+    IMPLEMENT_VECTOR_FCMP(!=);
+    default:
+      dbgs() << "Unhandled type for FCmp NE instruction: " << *Ty << "\n";
+      llvm_unreachable(0);
   }
+  // in vector case mask out NaN elements
+  if (Ty->isVectorTy())
+    for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+      if (DestMask.AggregateVal[_i].IntVal == false)
+        Dest.AggregateVal[_i].IntVal = APInt(1,false);
+
   return Dest;
 }
 
@@ -318,6 +402,7 @@ static GenericValue executeFCMP_OLE(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(<=, Float);
     IMPLEMENT_FCMP(<=, Double);
+    IMPLEMENT_VECTOR_FCMP(<=);
   default:
     dbgs() << "Unhandled type for FCmp LE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -331,6 +416,7 @@ static GenericValue executeFCMP_OGE(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(>=, Float);
     IMPLEMENT_FCMP(>=, Double);
+    IMPLEMENT_VECTOR_FCMP(>=);
   default:
     dbgs() << "Unhandled type for FCmp GE instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -344,6 +430,7 @@ static GenericValue executeFCMP_OLT(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(<, Float);
     IMPLEMENT_FCMP(<, Double);
+    IMPLEMENT_VECTOR_FCMP(<);
   default:
     dbgs() << "Unhandled type for FCmp LT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -357,6 +444,7 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
   switch (Ty->getTypeID()) {
     IMPLEMENT_FCMP(>, Float);
     IMPLEMENT_FCMP(>, Double);
+    IMPLEMENT_VECTOR_FCMP(>);
   default:
     dbgs() << "Unhandled type for FCmp GT instruction: " << *Ty << "\n";
     llvm_unreachable(0);
@@ -375,18 +463,32 @@ static GenericValue executeFCMP_OGT(GenericValue Src1, GenericValue Src2,
     return Dest;                                                         \
   }
 
+#define IMPLEMENT_VECTOR_UNORDERED(TY, X,Y, _FUNC)                       \
+  if (TY->isVectorTy()) {                                                \
+    GenericValue DestMask = Dest;                                        \
+    Dest = _FUNC(Src1, Src2, Ty);                                        \
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)               \
+        if (DestMask.AggregateVal[_i].IntVal == true)                    \
+          Dest.AggregateVal[_i].IntVal = APInt(1,true);                  \
+      return Dest;                                                       \
+  }
 
 static GenericValue executeFCMP_UEQ(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OEQ)
   return executeFCMP_OEQ(Src1, Src2, Ty);
+
 }
 
 static GenericValue executeFCMP_UNE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_ONE)
   return executeFCMP_ONE(Src1, Src2, Ty);
 }
 
@@ -394,6 +496,8 @@ static GenericValue executeFCMP_ULE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLE)
   return executeFCMP_OLE(Src1, Src2, Ty);
 }
 
@@ -401,6 +505,8 @@ static GenericValue executeFCMP_UGE(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGE)
   return executeFCMP_OGE(Src1, Src2, Ty);
 }
 
@@ -408,6 +514,8 @@ static GenericValue executeFCMP_ULT(GenericValue Src1, GenericValue Src2,
                                    Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OLT)
   return executeFCMP_OLT(Src1, Src2, Ty);
 }
 
@@ -415,33 +523,88 @@ static GenericValue executeFCMP_UGT(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
   IMPLEMENT_UNORDERED(Ty, Src1, Src2)
+  MASK_VECTOR_NANS(Ty, Src1, Src2, true)
+  IMPLEMENT_VECTOR_UNORDERED(Ty, Src1, Src2, executeFCMP_OGT)
   return executeFCMP_OGT(Src1, Src2, Ty);
 }
 
 static GenericValue executeFCMP_ORD(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal ==
+        Src1.AggregateVal[_i].FloatVal) &&
+        (Src2.AggregateVal[_i].FloatVal ==
+        Src2.AggregateVal[_i].FloatVal)));
+    } else {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].DoubleVal ==
+        Src1.AggregateVal[_i].DoubleVal) &&
+        (Src2.AggregateVal[_i].DoubleVal ==
+        Src2.AggregateVal[_i].DoubleVal)));
+    }
+  } else if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal == Src1.FloatVal && 
                            Src2.FloatVal == Src2.FloatVal));
-  else
+  else {
     Dest.IntVal = APInt(1,(Src1.DoubleVal == Src1.DoubleVal && 
                            Src2.DoubleVal == Src2.DoubleVal));
+  }
   return Dest;
 }
 
 static GenericValue executeFCMP_UNO(GenericValue Src1, GenericValue Src2,
                                      Type *Ty) {
   GenericValue Dest;
-  if (Ty->isFloatTy())
+  if(Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+    if(dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy()) {
+      for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,
+        ( (Src1.AggregateVal[_i].FloatVal !=
+           Src1.AggregateVal[_i].FloatVal) ||
+          (Src2.AggregateVal[_i].FloatVal !=
+           Src2.AggregateVal[_i].FloatVal)));
+      } else {
+        for( size_t _i=0;_i<Src1.AggregateVal.size();_i++)
+          Dest.AggregateVal[_i].IntVal = APInt(1,
+          ( (Src1.AggregateVal[_i].DoubleVal !=
+             Src1.AggregateVal[_i].DoubleVal) ||
+            (Src2.AggregateVal[_i].DoubleVal !=
+             Src2.AggregateVal[_i].DoubleVal)));
+      }
+  } else if (Ty->isFloatTy())
     Dest.IntVal = APInt(1,(Src1.FloatVal != Src1.FloatVal || 
                            Src2.FloatVal != Src2.FloatVal));
-  else
+  else {
     Dest.IntVal = APInt(1,(Src1.DoubleVal != Src1.DoubleVal || 
                            Src2.DoubleVal != Src2.DoubleVal));
+  }
   return Dest;
 }
 
+static GenericValue executeFCMP_BOOL(GenericValue Src1, GenericValue Src2,
+                                    const Type *Ty, const bool val) {
+  GenericValue Dest;
+    if(Ty->isVectorTy()) {
+      assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+      Dest.AggregateVal.resize( Src1.AggregateVal.size() );
+      for( size_t _i=0; _i<Src1.AggregateVal.size(); _i++)
+        Dest.AggregateVal[_i].IntVal = APInt(1,val);
+    } else {
+      Dest.IntVal = APInt(1, val);
+    }
+
+    return Dest;
+}
+
 void Interpreter::visitFCmpInst(FCmpInst &I) {
   ExecutionContext &SF = ECStack.back();
   Type *Ty    = I.getOperand(0)->getType();
@@ -450,8 +613,14 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   GenericValue R;   // Result
   
   switch (I.getPredicate()) {
-  case FCmpInst::FCMP_FALSE: R.IntVal = APInt(1,false); break;
-  case FCmpInst::FCMP_TRUE:  R.IntVal = APInt(1,true); break;
+  default:
+    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
+    llvm_unreachable(0);
+  break;
+  case FCmpInst::FCMP_FALSE: R = executeFCMP_BOOL(Src1, Src2, Ty, false); 
+  break;
+  case FCmpInst::FCMP_TRUE:  R = executeFCMP_BOOL(Src1, Src2, Ty, true); 
+  break;
   case FCmpInst::FCMP_ORD:   R = executeFCMP_ORD(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UNO:   R = executeFCMP_UNO(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UEQ:   R = executeFCMP_UEQ(Src1, Src2, Ty); break;
@@ -466,9 +635,6 @@ void Interpreter::visitFCmpInst(FCmpInst &I) {
   case FCmpInst::FCMP_OLE:   R = executeFCMP_OLE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_UGE:   R = executeFCMP_UGE(Src1, Src2, Ty); break;
   case FCmpInst::FCMP_OGE:   R = executeFCMP_OGE(Src1, Src2, Ty); break;
-  default:
-    dbgs() << "Don't know how to handle this FCmp predicate!\n-->" << I;
-    llvm_unreachable(0);
   }
  
   SetValue(&I, R, SF);
@@ -502,16 +668,8 @@ static GenericValue executeCmpInst(unsigned predicate, GenericValue Src1,
   case FCmpInst::FCMP_ULE:   return executeFCMP_ULE(Src1, Src2, Ty);
   case FCmpInst::FCMP_OGE:   return executeFCMP_OGE(Src1, Src2, Ty);
   case FCmpInst::FCMP_UGE:   return executeFCMP_UGE(Src1, Src2, Ty);
-  case FCmpInst::FCMP_FALSE: { 
-    GenericValue Result;
-    Result.IntVal = APInt(1, false);
-    return Result;
-  }
-  case FCmpInst::FCMP_TRUE: {
-    GenericValue Result;
-    Result.IntVal = APInt(1, true);
-    return Result;
-  }
+  case FCmpInst::FCMP_FALSE: return executeFCMP_BOOL(Src1, Src2, Ty, false);
+  case FCmpInst::FCMP_TRUE:  return executeFCMP_BOOL(Src1, Src2, Ty, true);
   default:
     dbgs() << "Unhandled Cmp predicate\n";
     llvm_unreachable(0);
@@ -525,27 +683,105 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) {
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue R;   // Result
 
-  switch (I.getOpcode()) {
-  case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
-  case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
-  case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
-  case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
-  case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
-  case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
-  case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
-  case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
-  case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
-  case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
-  case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
-  case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
-  case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
-  case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
-  case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
-  default:
-    dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
-    llvm_unreachable(0);
+  // First process vector operation
+  if (Ty->isVectorTy()) {
+    assert(Src1.AggregateVal.size() == Src2.AggregateVal.size());
+    R.AggregateVal.resize(Src1.AggregateVal.size());
+
+    // Macros to execute binary operation 'OP' over integer vectors
+#define INTEGER_VECTOR_OPERATION(OP)                               \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal OP Src2.AggregateVal[i].IntVal;
+
+    // Additional macros to execute binary operations udiv/sdiv/urem/srem since
+    // they have different notation.
+#define INTEGER_VECTOR_FUNCTION(OP)                                \
+    for (unsigned i = 0; i < R.AggregateVal.size(); ++i)           \
+      R.AggregateVal[i].IntVal =                                   \
+      Src1.AggregateVal[i].IntVal.OP(Src2.AggregateVal[i].IntVal);
+
+    // Macros to execute binary operation 'OP' over floating point type TY
+    // (float or double) vectors
+#define FLOAT_VECTOR_FUNCTION(OP, TY)                               \
+      for (unsigned i = 0; i < R.AggregateVal.size(); ++i)          \
+        R.AggregateVal[i].TY =                                      \
+        Src1.AggregateVal[i].TY OP Src2.AggregateVal[i].TY;
+
+    // Macros to choose appropriate TY: float or double and run operation
+    // execution
+#define FLOAT_VECTOR_OP(OP) {                                         \
+  if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())        \
+    FLOAT_VECTOR_FUNCTION(OP, FloatVal)                               \
+  else {                                                              \
+    if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())     \
+      FLOAT_VECTOR_FUNCTION(OP, DoubleVal)                            \
+    else {                                                            \
+      dbgs() << "Unhandled type for OP instruction: " << *Ty << "\n"; \
+      llvm_unreachable(0);                                            \
+    }                                                                 \
+  }                                                                   \
+}
+
+    switch(I.getOpcode()){
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   INTEGER_VECTOR_OPERATION(+) break;
+    case Instruction::Sub:   INTEGER_VECTOR_OPERATION(-) break;
+    case Instruction::Mul:   INTEGER_VECTOR_OPERATION(*) break;
+    case Instruction::UDiv:  INTEGER_VECTOR_FUNCTION(udiv) break;
+    case Instruction::SDiv:  INTEGER_VECTOR_FUNCTION(sdiv) break;
+    case Instruction::URem:  INTEGER_VECTOR_FUNCTION(urem) break;
+    case Instruction::SRem:  INTEGER_VECTOR_FUNCTION(srem) break;
+    case Instruction::And:   INTEGER_VECTOR_OPERATION(&) break;
+    case Instruction::Or:    INTEGER_VECTOR_OPERATION(|) break;
+    case Instruction::Xor:   INTEGER_VECTOR_OPERATION(^) break;
+    case Instruction::FAdd:  FLOAT_VECTOR_OP(+) break;
+    case Instruction::FSub:  FLOAT_VECTOR_OP(-) break;
+    case Instruction::FMul:  FLOAT_VECTOR_OP(*) break;
+    case Instruction::FDiv:  FLOAT_VECTOR_OP(/) break;
+    case Instruction::FRem:
+      if (dyn_cast<VectorType>(Ty)->getElementType()->isFloatTy())
+        for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+          R.AggregateVal[i].FloatVal = 
+          fmod(Src1.AggregateVal[i].FloatVal, Src2.AggregateVal[i].FloatVal);
+      else {
+        if (dyn_cast<VectorType>(Ty)->getElementType()->isDoubleTy())
+          for (unsigned i = 0; i < R.AggregateVal.size(); ++i)
+            R.AggregateVal[i].DoubleVal = 
+            fmod(Src1.AggregateVal[i].DoubleVal, Src2.AggregateVal[i].DoubleVal);
+        else {
+          dbgs() << "Unhandled type for Rem instruction: " << *Ty << "\n";
+          llvm_unreachable(0);
+        }
+      }
+      break;
+    }
+  } else {
+    switch (I.getOpcode()) {
+    default:
+      dbgs() << "Don't know how to handle this binary operator!\n-->" << I;
+      llvm_unreachable(0);
+      break;
+    case Instruction::Add:   R.IntVal = Src1.IntVal + Src2.IntVal; break;
+    case Instruction::Sub:   R.IntVal = Src1.IntVal - Src2.IntVal; break;
+    case Instruction::Mul:   R.IntVal = Src1.IntVal * Src2.IntVal; break;
+    case Instruction::FAdd:  executeFAddInst(R, Src1, Src2, Ty); break;
+    case Instruction::FSub:  executeFSubInst(R, Src1, Src2, Ty); break;
+    case Instruction::FMul:  executeFMulInst(R, Src1, Src2, Ty); break;
+    case Instruction::FDiv:  executeFDivInst(R, Src1, Src2, Ty); break;
+    case Instruction::FRem:  executeFRemInst(R, Src1, Src2, Ty); break;
+    case Instruction::UDiv:  R.IntVal = Src1.IntVal.udiv(Src2.IntVal); break;
+    case Instruction::SDiv:  R.IntVal = Src1.IntVal.sdiv(Src2.IntVal); break;
+    case Instruction::URem:  R.IntVal = Src1.IntVal.urem(Src2.IntVal); break;
+    case Instruction::SRem:  R.IntVal = Src1.IntVal.srem(Src2.IntVal); break;
+    case Instruction::And:   R.IntVal = Src1.IntVal & Src2.IntVal; break;
+    case Instruction::Or:    R.IntVal = Src1.IntVal | Src2.IntVal; break;
+    case Instruction::Xor:   R.IntVal = Src1.IntVal ^ Src2.IntVal; break;
+    }
   }
-
   SetValue(&I, R, SF);
 }
 
@@ -902,16 +1138,42 @@ void Interpreter::visitCallSite(CallSite CS) {
   callFunction((Function*)GVTOP(SRC), ArgVals);
 }
 
+// auxilary function for shift operations
+static unsigned getShiftAmount(uint64_t orgShiftAmount,
+                               llvm::APInt valueToShift) {
+  unsigned valueWidth = valueToShift.getBitWidth();
+  if (orgShiftAmount < (uint64_t)valueWidth)
+    return orgShiftAmount;
+  // according to the llvm documentation, if orgShiftAmount > valueWidth,
+  // the result is undfeined. but we do shift by this rule:
+  return (NextPowerOf2(valueWidth-1) - 1) & orgShiftAmount;
+}
+
+
 void Interpreter::visitShl(BinaryOperator &I) {
   ExecutionContext &SF = ECStack.back();
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
-  
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift));
+  }
+
   SetValue(&I, Dest, SF);
 }
 
@@ -920,11 +1182,25 @@ void Interpreter::visitLShr(BinaryOperator &I) {
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
-  
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    uint32_t src1Size = uint32_t(Src1.AggregateVal.size());
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift));
+  }
+
   SetValue(&I, Dest, SF);
 }
 
@@ -933,110 +1209,273 @@ void Interpreter::visitAShr(BinaryOperator &I) {
   GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
   GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
   GenericValue Dest;
-  if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth())
-    Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue());
-  else
-    Dest.IntVal = Src1.IntVal;
-  
+  const Type *Ty = I.getType();
+
+  if (Ty->isVectorTy()) {
+    size_t src1Size = Src1.AggregateVal.size();
+    assert(src1Size == Src2.AggregateVal.size());
+    for (unsigned i = 0; i < src1Size; i++) {
+      GenericValue Result;
+      uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue();
+      llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal;
+      Result.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift));
+      Dest.AggregateVal.push_back(Result);
+    }
+  } else {
+    // scalar
+    uint64_t shiftAmount = Src2.IntVal.getZExtValue();
+    llvm::APInt valueToShift = Src1.IntVal;
+    Dest.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift));
+  }
+
   SetValue(&I, Dest, SF);
 }
 
 GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
-  unsigned DBitWidth = DITy->getBitWidth();
-  Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+  Type *SrcTy = SrcVal->getType();
+  if (SrcTy->isVectorTy()) {
+    Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned NumElts = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(NumElts);
+    for (unsigned i = 0; i < NumElts; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.trunc(DBitWidth);
+  } else {
+    IntegerType *DITy = cast<IntegerType>(DstTy);
+    unsigned DBitWidth = DITy->getBitWidth();
+    Dest.IntVal = Src.IntVal.trunc(DBitWidth);
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
-  unsigned DBitWidth = DITy->getBitWidth();
-  Dest.IntVal = Src.IntVal.sext(DBitWidth);
+  if (SrcTy->isVectorTy()) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.sext(DBitWidth);
+  } else {
+    const IntegerType *DITy = cast<IntegerType>(DstTy);
+    unsigned DBitWidth = DITy->getBitWidth();
+    Dest.IntVal = Src.IntVal.sext(DBitWidth);
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy,
                                           ExecutionContext &SF) {
+  const Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  IntegerType *DITy = cast<IntegerType>(DstTy);
-  unsigned DBitWidth = DITy->getBitWidth();
-  Dest.IntVal = Src.IntVal.zext(DBitWidth);
+  if (SrcTy->isVectorTy()) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.zext(DBitWidth);
+  } else {
+    const IntegerType *DITy = cast<IntegerType>(DstTy);
+    unsigned DBitWidth = DITy->getBitWidth();
+    Dest.IntVal = Src.IntVal.zext(DBitWidth);
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, Type *DstTy,
                                              ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
-         "Invalid FPTrunc instruction");
-  Dest.FloatVal = (float) Src.DoubleVal;
+
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    assert(SrcVal->getType()->getScalarType()->isDoubleTy() &&
+           DstTy->getScalarType()->isFloatTy() &&
+           "Invalid FPTrunc instruction");
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].FloatVal = (float)Src.AggregateVal[i].DoubleVal;
+  } else {
+    assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() &&
+           "Invalid FPTrunc instruction");
+    Dest.FloatVal = (float)Src.DoubleVal;
+  }
+
   return Dest;
 }
 
 GenericValue Interpreter::executeFPExtInst(Value *SrcVal, Type *DstTy,
                                            ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
-         "Invalid FPTrunc instruction");
-  Dest.DoubleVal = (double) Src.FloatVal;
+
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    assert(SrcVal->getType()->getScalarType()->isFloatTy() &&
+           DstTy->getScalarType()->isDoubleTy() && "Invalid FPExt instruction");
+
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+    for (unsigned i = 0; i < size; i++)
+      Dest.AggregateVal[i].DoubleVal = (double)Src.AggregateVal[i].FloatVal;
+  } else {
+    assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() &&
+           "Invalid FPExt instruction");
+    Dest.DoubleVal = (double)Src.FloatVal;
+  }
+
   return Dest;
 }
 
 GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   Type *SrcTy = SrcVal->getType();
-  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
 
-  if (SrcTy->getTypeID() == Type::FloatTyID)
-    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
-  else
-    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  if (SrcTy->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    const Type *SrcVecTy = SrcTy->getScalarType();
+    uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal.
+    Dest.AggregateVal.resize(size);
+
+    if (SrcVecTy->getTypeID() == Type::FloatTyID) {
+      assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt(
+            Src.AggregateVal[i].FloatVal, DBitWidth);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt(
+            Src.AggregateVal[i].DoubleVal, DBitWidth);
+    }
+  } else {
+    // scalar
+    uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+    assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction");
+
+    if (SrcTy->getTypeID() == Type::FloatTyID)
+      Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+    else {
+      Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+    }
+  }
+
   return Dest;
 }
 
 GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   Type *SrcTy = SrcVal->getType();
-  uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
 
-  if (SrcTy->getTypeID() == Type::FloatTyID)
-    Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
-  else
-    Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+  if (SrcTy->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    const Type *SrcVecTy = SrcTy->getScalarType();
+    uint32_t DBitWidth = cast<IntegerType>(DstVecTy)->getBitWidth();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (SrcVecTy->getTypeID() == Type::FloatTyID) {
+      assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt(
+            Src.AggregateVal[i].FloatVal, DBitWidth);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt(
+            Src.AggregateVal[i].DoubleVal, DBitWidth);
+    }
+  } else {
+    // scalar
+    unsigned DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
+    assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction");
+
+    if (SrcTy->getTypeID() == Type::FloatTyID)
+      Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth);
+    else {
+      Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth);
+    }
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
 
-  if (DstTy->getTypeID() == Type::FloatTyID)
-    Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
-  else
-    Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (DstVecTy->getTypeID() == Type::FloatTyID) {
+      assert(DstVecTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].FloatVal =
+            APIntOps::RoundAPIntToFloat(Src.AggregateVal[i].IntVal);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].DoubleVal =
+            APIntOps::RoundAPIntToDouble(Src.AggregateVal[i].IntVal);
+    }
+  } else {
+    // scalar
+    assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction");
+    if (DstTy->getTypeID() == Type::FloatTyID)
+      Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal);
+    else {
+      Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal);
+    }
+  }
   return Dest;
 }
 
 GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy,
                                             ExecutionContext &SF) {
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
 
-  if (DstTy->getTypeID() == Type::FloatTyID)
-    Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
-  else
-    Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
-  return Dest;
+  if (SrcVal->getType()->getTypeID() == Type::VectorTyID) {
+    const Type *DstVecTy = DstTy->getScalarType();
+    unsigned size = Src.AggregateVal.size();
+    // the sizes of src and dst vectors must be equal
+    Dest.AggregateVal.resize(size);
+
+    if (DstVecTy->getTypeID() == Type::FloatTyID) {
+      assert(DstVecTy->isFloatingPointTy() && "Invalid SIToFP instruction");
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].FloatVal =
+            APIntOps::RoundSignedAPIntToFloat(Src.AggregateVal[i].IntVal);
+    } else {
+      for (unsigned i = 0; i < size; i++)
+        Dest.AggregateVal[i].DoubleVal =
+            APIntOps::RoundSignedAPIntToDouble(Src.AggregateVal[i].IntVal);
+    }
+  } else {
+    // scalar
+    assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction");
 
+    if (DstTy->getTypeID() == Type::FloatTyID)
+      Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal);
+    else {
+      Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal);
+    }
+  }
+
+  return Dest;
 }
 
 GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, Type *DstTy,
@@ -1064,33 +1503,167 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy,
 
 GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy,
                                              ExecutionContext &SF) {
-  
+
+  // This instruction supports bitwise conversion of vectors to integers and
+  // to vectors of other types (as long as they have the same size)
   Type *SrcTy = SrcVal->getType();
   GenericValue Dest, Src = getOperandValue(SrcVal, SF);
-  if (DstTy->isPointerTy()) {
-    assert(SrcTy->isPointerTy() && "Invalid BitCast");
-    Dest.PointerVal = Src.PointerVal;
-  } else if (DstTy->isIntegerTy()) {
-    if (SrcTy->isFloatTy()) {
-      Dest.IntVal = APInt::floatToBits(Src.FloatVal);
-    } else if (SrcTy->isDoubleTy()) {
-      Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
-    } else if (SrcTy->isIntegerTy()) {
-      Dest.IntVal = Src.IntVal;
-    } else 
+
+  if ((SrcTy->getTypeID() == Type::VectorTyID) ||
+      (DstTy->getTypeID() == Type::VectorTyID)) {
+    // vector src bitcast to vector dst or vector src bitcast to scalar dst or
+    // scalar src bitcast to vector dst
+    bool isLittleEndian = TD.isLittleEndian();
+    GenericValue TempDst, TempSrc, SrcVec;
+    const Type *SrcElemTy;
+    const Type *DstElemTy;
+    unsigned SrcBitSize;
+    unsigned DstBitSize;
+    unsigned SrcNum;
+    unsigned DstNum;
+
+    if (SrcTy->getTypeID() == Type::VectorTyID) {
+      SrcElemTy = SrcTy->getScalarType();
+      SrcBitSize = SrcTy->getScalarSizeInBits();
+      SrcNum = Src.AggregateVal.size();
+      SrcVec = Src;
+    } else {
+      // if src is scalar value, make it vector <1 x type>
+      SrcElemTy = SrcTy;
+      SrcBitSize = SrcTy->getPrimitiveSizeInBits();
+      SrcNum = 1;
+      SrcVec.AggregateVal.push_back(Src);
+    }
+
+    if (DstTy->getTypeID() == Type::VectorTyID) {
+      DstElemTy = DstTy->getScalarType();
+      DstBitSize = DstTy->getScalarSizeInBits();
+      DstNum = (SrcNum * SrcBitSize) / DstBitSize;
+    } else {
+      DstElemTy = DstTy;
+      DstBitSize = DstTy->getPrimitiveSizeInBits();
+      DstNum = 1;
+    }
+
+    if (SrcNum * SrcBitSize != DstNum * DstBitSize)
       llvm_unreachable("Invalid BitCast");
-  } else if (DstTy->isFloatTy()) {
-    if (SrcTy->isIntegerTy())
-      Dest.FloatVal = Src.IntVal.bitsToFloat();
-    else
-      Dest.FloatVal = Src.FloatVal;
-  } else if (DstTy->isDoubleTy()) {
-    if (SrcTy->isIntegerTy())
-      Dest.DoubleVal = Src.IntVal.bitsToDouble();
-    else
-      Dest.DoubleVal = Src.DoubleVal;
-  } else
-    llvm_unreachable("Invalid Bitcast");
+
+    // If src is floating point, cast to integer first.
+    TempSrc.AggregateVal.resize(SrcNum);
+    if (SrcElemTy->isFloatTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal =
+            APInt::floatToBits(SrcVec.AggregateVal[i].FloatVal);
+
+    } else if (SrcElemTy->isDoubleTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal =
+            APInt::doubleToBits(SrcVec.AggregateVal[i].DoubleVal);
+    } else if (SrcElemTy->isIntegerTy()) {
+      for (unsigned i = 0; i < SrcNum; i++)
+        TempSrc.AggregateVal[i].IntVal = SrcVec.AggregateVal[i].IntVal;
+    } else {
+      // Pointers are not allowed as the element type of vector.
+      llvm_unreachable("Invalid Bitcast");
+    }
+
+    // now TempSrc is integer type vector
+    if (DstNum < SrcNum) {
+      // Example: bitcast <4 x i32> <i32 0, i32 1, i32 2, i32 3> to <2 x i64>
+      unsigned Ratio = SrcNum / DstNum;
+      unsigned SrcElt = 0;
+      for (unsigned i = 0; i < DstNum; i++) {
+        GenericValue Elt;
+        Elt.IntVal = 0;
+        Elt.IntVal = Elt.IntVal.zext(DstBitSize);
+        unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize * (Ratio - 1);
+        for (unsigned j = 0; j < Ratio; j++) {
+          APInt Tmp;
+          Tmp = Tmp.zext(SrcBitSize);
+          Tmp = TempSrc.AggregateVal[SrcElt++].IntVal;
+          Tmp = Tmp.zext(DstBitSize);
+          Tmp = Tmp.shl(ShiftAmt);
+          ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize;
+          Elt.IntVal |= Tmp;
+        }
+        TempDst.AggregateVal.push_back(Elt);
+      }
+    } else {
+      // Example: bitcast <2 x i64> <i64 0, i64 1> to <4 x i32>
+      unsigned Ratio = DstNum / SrcNum;
+      for (unsigned i = 0; i < SrcNum; i++) {
+        unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize * (Ratio - 1);
+        for (unsigned j = 0; j < Ratio; j++) {
+          GenericValue Elt;
+          Elt.IntVal = Elt.IntVal.zext(SrcBitSize);
+          Elt.IntVal = TempSrc.AggregateVal[i].IntVal;
+          Elt.IntVal = Elt.IntVal.lshr(ShiftAmt);
+          // it could be DstBitSize == SrcBitSize, so check it
+          if (DstBitSize < SrcBitSize)
+            Elt.IntVal = Elt.IntVal.trunc(DstBitSize);
+          ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize;
+          TempDst.AggregateVal.push_back(Elt);
+        }
+      }
+    }
+
+    // convert result from integer to specified type
+    if (DstTy->getTypeID() == Type::VectorTyID) {
+      if (DstElemTy->isDoubleTy()) {
+        Dest.AggregateVal.resize(DstNum);
+        for (unsigned i = 0; i < DstNum; i++)
+          Dest.AggregateVal[i].DoubleVal =
+              TempDst.AggregateVal[i].IntVal.bitsToDouble();
+      } else if (DstElemTy->isFloatTy()) {
+        Dest.AggregateVal.resize(DstNum);
+        for (unsigned i = 0; i < DstNum; i++)
+          Dest.AggregateVal[i].FloatVal =
+              TempDst.AggregateVal[i].IntVal.bitsToFloat();
+      } else {
+        Dest = TempDst;
+      }
+    } else {
+      if (DstElemTy->isDoubleTy())
+        Dest.DoubleVal = TempDst.AggregateVal[0].IntVal.bitsToDouble();
+      else if (DstElemTy->isFloatTy()) {
+        Dest.FloatVal = TempDst.AggregateVal[0].IntVal.bitsToFloat();
+      } else {
+        Dest.IntVal = TempDst.AggregateVal[0].IntVal;
+      }
+    }
+  } else { //  if ((SrcTy->getTypeID() == Type::VectorTyID) ||
+           //     (DstTy->getTypeID() == Type::VectorTyID))
+
+    // scalar src bitcast to scalar dst
+    if (DstTy->isPointerTy()) {
+      assert(SrcTy->isPointerTy() && "Invalid BitCast");
+      Dest.PointerVal = Src.PointerVal;
+    } else if (DstTy->isIntegerTy()) {
+      if (SrcTy->isFloatTy())
+        Dest.IntVal = APInt::floatToBits(Src.FloatVal);
+      else if (SrcTy->isDoubleTy()) {
+        Dest.IntVal = APInt::doubleToBits(Src.DoubleVal);
+      } else if (SrcTy->isIntegerTy()) {
+        Dest.IntVal = Src.IntVal;
+      } else {
+        llvm_unreachable("Invalid BitCast");
+      }
+    } else if (DstTy->isFloatTy()) {
+      if (SrcTy->isIntegerTy())
+        Dest.FloatVal = Src.IntVal.bitsToFloat();
+      else {
+        Dest.FloatVal = Src.FloatVal;
+      }
+    } else if (DstTy->isDoubleTy()) {
+      if (SrcTy->isIntegerTy())
+        Dest.DoubleVal = Src.IntVal.bitsToDouble();
+      else {
+        Dest.DoubleVal = Src.DoubleVal;
+      }
+    } else {
+      llvm_unreachable("Invalid Bitcast");
+    }
+  }
 
   return Dest;
 }
@@ -1187,6 +1760,39 @@ void Interpreter::visitVAArgInst(VAArgInst &I) {
   ++VAList.UIntPairVal.second;
 }
 
+void Interpreter::visitExtractElementInst(ExtractElementInst &I) {
+  ExecutionContext &SF = ECStack.back();
+  GenericValue Src1 = getOperandValue(I.getOperand(0), SF);
+  GenericValue Src2 = getOperandValue(I.getOperand(1), SF);
+  GenericValue Dest;
+
+  Type *Ty = I.getType();
+  const unsigned indx = unsigned(Src2.IntVal.getZExtValue());
+
+  if(Src1.AggregateVal.size() > indx) {
+    switch (Ty->getTypeID()) {
+    default:
+      dbgs() << "Unhandled destination type for extractelement instruction: "
+      << *Ty << "\n";
+      llvm_unreachable(0);
+      break;
+    case Type::IntegerTyID:
+      Dest.IntVal = Src1.AggregateVal[indx].IntVal;
+      break;
+    case Type::FloatTyID:
+      Dest.FloatVal = Src1.AggregateVal[indx].FloatVal;
+      break;
+    case Type::DoubleTyID:
+      Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal;
+      break;
+    }
+  } else {
+    dbgs() << "Invalid index in extractelement instruction\n";
+  }
+
+  SetValue(&I, Dest, SF);
+}
+
 GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE,
                                                 ExecutionContext &SF) {
   switch (CE->getOpcode()) {
diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h
index e95db2f..2952d7e 100644
--- a/lib/ExecutionEngine/Interpreter/Interpreter.h
+++ b/lib/ExecutionEngine/Interpreter/Interpreter.h
@@ -178,6 +178,7 @@ public:
   void visitAShr(BinaryOperator &I);
 
   void visitVAArgInst(VAArgInst &I);
+  void visitExtractElementInst(ExtractElementInst &I);
   void visitInstruction(Instruction &I) {
     errs() << I << "\n";
     llvm_unreachable("Instruction not interpretable yet!");
diff --git a/lib/ExecutionEngine/JIT/Android.mk b/lib/ExecutionEngine/JIT/Android.mk
index 02cef81..0466ba0 100644
--- a/lib/ExecutionEngine/JIT/Android.mk
+++ b/lib/ExecutionEngine/JIT/Android.mk
@@ -6,7 +6,6 @@ include $(CLEAR_VARS)
 
 LOCAL_SRC_FILES :=	\
 	JIT.cpp	\
-	JITDwarfEmitter.cpp	\
 	JITEmitter.cpp	\
 	JITMemoryManager.cpp
 
diff --git a/lib/ExecutionEngine/JIT/CMakeLists.txt b/lib/ExecutionEngine/JIT/CMakeLists.txt
index 52bb389..e16baed 100644
--- a/lib/ExecutionEngine/JIT/CMakeLists.txt
+++ b/lib/ExecutionEngine/JIT/CMakeLists.txt
@@ -3,7 +3,6 @@ add_definitions(-DENABLE_X86_JIT)
 
 add_llvm_library(LLVMJIT
   JIT.cpp
-  JITDwarfEmitter.cpp
   JITEmitter.cpp
   JITMemoryManager.cpp
   )
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
deleted file mode 100644
index 35d2b8b..0000000
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ /dev/null
@@ -1,596 +0,0 @@
-//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITDwarfEmitter object that is used by the JIT to
-// write dwarf tables to memory.
-//
-//===----------------------------------------------------------------------===//
-
-#include "JITDwarfEmitter.h"
-#include "JIT.h"
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/CodeGen/JITCodeEmitter.h"
-#include "llvm/CodeGen/MachineFunction.h"
-#include "llvm/CodeGen/MachineModuleInfo.h"
-#include "llvm/ExecutionEngine/JITMemoryManager.h"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/Function.h"
-#include "llvm/MC/MCAsmInfo.h"
-#include "llvm/MC/MCSymbol.h"
-#include "llvm/MC/MachineLocation.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Target/TargetFrameLowering.h"
-#include "llvm/Target/TargetInstrInfo.h"
-#include "llvm/Target/TargetMachine.h"
-#include "llvm/Target/TargetRegisterInfo.h"
-using namespace llvm;
-
-JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {}
-
-
-unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F,
-                                               JITCodeEmitter& jce,
-                                               unsigned char* StartFunction,
-                                               unsigned char* EndFunction,
-                                               unsigned char* &EHFramePtr) {
-  assert(MMI && "MachineModuleInfo not registered!");
-
-  const TargetMachine& TM = F.getTarget();
-  TD = TM.getDataLayout();
-  stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection();
-  RI = TM.getRegisterInfo();
-  MAI = TM.getMCAsmInfo();
-  JCE = &jce;
-
-  unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction,
-                                                     EndFunction);
-
-  unsigned char* Result = 0;
-
-  const std::vector<const Function *> Personalities = MMI->getPersonalities();
-  EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]);
-
-  Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr,
-                       StartFunction, EndFunction, ExceptionTable);
-
-  return Result;
-}
-
-
-void
-JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr,
-                                const std::vector<MachineMove> &Moves) const {
-  unsigned PointerSize = TD->getPointerSize();
-  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
-          PointerSize : -PointerSize;
-  MCSymbol *BaseLabel = 0;
-
-  for (unsigned i = 0, N = Moves.size(); i < N; ++i) {
-    const MachineMove &Move = Moves[i];
-    MCSymbol *Label = Move.getLabel();
-
-    // Throw out move if the label is invalid.
-    if (Label && (*JCE->getLabelLocations())[Label] == 0)
-      continue;
-
-    intptr_t LabelPtr = 0;
-    if (Label) LabelPtr = JCE->getLabelAddress(Label);
-
-    const MachineLocation &Dst = Move.getDestination();
-    const MachineLocation &Src = Move.getSource();
-
-    // Advance row if new location.
-    if (BaseLabelPtr && Label && BaseLabel != Label) {
-      JCE->emitByte(dwarf::DW_CFA_advance_loc4);
-      JCE->emitInt32(LabelPtr - BaseLabelPtr);
-
-      BaseLabel = Label;
-      BaseLabelPtr = LabelPtr;
-    }
-
-    // If advancing cfa.
-    if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) {
-      if (!Src.isReg()) {
-        if (Src.getReg() == MachineLocation::VirtualFP) {
-          JCE->emitByte(dwarf::DW_CFA_def_cfa_offset);
-        } else {
-          JCE->emitByte(dwarf::DW_CFA_def_cfa);
-          JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true));
-        }
-
-        JCE->emitULEB128Bytes(-Src.getOffset());
-      } else {
-        llvm_unreachable("Machine move not supported yet.");
-      }
-    } else if (Src.isReg() &&
-      Src.getReg() == MachineLocation::VirtualFP) {
-      if (Dst.isReg()) {
-        JCE->emitByte(dwarf::DW_CFA_def_cfa_register);
-        JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true));
-      } else {
-        llvm_unreachable("Machine move not supported yet.");
-      }
-    } else {
-      unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true);
-      int Offset = Dst.getOffset() / stackGrowth;
-
-      if (Offset < 0) {
-        JCE->emitByte(dwarf::DW_CFA_offset_extended_sf);
-        JCE->emitULEB128Bytes(Reg);
-        JCE->emitSLEB128Bytes(Offset);
-      } else if (Reg < 64) {
-        JCE->emitByte(dwarf::DW_CFA_offset + Reg);
-        JCE->emitULEB128Bytes(Offset);
-      } else {
-        JCE->emitByte(dwarf::DW_CFA_offset_extended);
-        JCE->emitULEB128Bytes(Reg);
-        JCE->emitULEB128Bytes(Offset);
-      }
-    }
-  }
-}
-
-/// SharedTypeIds - How many leading type ids two landing pads have in common.
-static unsigned SharedTypeIds(const LandingPadInfo *L,
-                              const LandingPadInfo *R) {
-  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
-  unsigned LSize = LIds.size(), RSize = RIds.size();
-  unsigned MinSize = LSize < RSize ? LSize : RSize;
-  unsigned Count = 0;
-
-  for (; Count != MinSize; ++Count)
-    if (LIds[Count] != RIds[Count])
-      return Count;
-
-  return Count;
-}
-
-
-/// PadLT - Order landing pads lexicographically by type id.
-static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) {
-  const std::vector<int> &LIds = L->TypeIds, &RIds = R->TypeIds;
-  unsigned LSize = LIds.size(), RSize = RIds.size();
-  unsigned MinSize = LSize < RSize ? LSize : RSize;
-
-  for (unsigned i = 0; i != MinSize; ++i)
-    if (LIds[i] != RIds[i])
-      return LIds[i] < RIds[i];
-
-  return LSize < RSize;
-}
-
-namespace {
-
-/// ActionEntry - Structure describing an entry in the actions table.
-struct ActionEntry {
-  int ValueForTypeID; // The value to write - may not be equal to the type id.
-  int NextAction;
-  struct ActionEntry *Previous;
-};
-
-/// PadRange - Structure holding a try-range and the associated landing pad.
-struct PadRange {
-  // The index of the landing pad.
-  unsigned PadIndex;
-  // The index of the begin and end labels in the landing pad's label lists.
-  unsigned RangeIndex;
-};
-
-typedef DenseMap<MCSymbol*, PadRange> RangeMapType;
-
-/// CallSiteEntry - Structure describing an entry in the call-site table.
-struct CallSiteEntry {
-  MCSymbol *BeginLabel; // zero indicates the start of the function.
-  MCSymbol *EndLabel;   // zero indicates the end of the function.
-  MCSymbol *PadLabel;   // zero indicates that there is no landing pad.
-  unsigned Action;
-};
-
-}
-
-unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF,
-                                         unsigned char* StartFunction,
-                                         unsigned char* EndFunction) const {
-  assert(MMI && "MachineModuleInfo not registered!");
-
-  // Map all labels and get rid of any dead landing pads.
-  MMI->TidyLandingPads(JCE->getLabelLocations());
-
-  const std::vector<const GlobalVariable *> &TypeInfos = MMI->getTypeInfos();
-  const std::vector<unsigned> &FilterIds = MMI->getFilterIds();
-  const std::vector<LandingPadInfo> &PadInfos = MMI->getLandingPads();
-  if (PadInfos.empty()) return 0;
-
-  // Sort the landing pads in order of their type ids.  This is used to fold
-  // duplicate actions.
-  SmallVector<const LandingPadInfo *, 64> LandingPads;
-  LandingPads.reserve(PadInfos.size());
-  for (unsigned i = 0, N = PadInfos.size(); i != N; ++i)
-    LandingPads.push_back(&PadInfos[i]);
-  std::sort(LandingPads.begin(), LandingPads.end(), PadLT);
-
-  // Negative type ids index into FilterIds, positive type ids index into
-  // TypeInfos.  The value written for a positive type id is just the type
-  // id itself.  For a negative type id, however, the value written is the
-  // (negative) byte offset of the corresponding FilterIds entry.  The byte
-  // offset is usually equal to the type id, because the FilterIds entries
-  // are written using a variable width encoding which outputs one byte per
-  // entry as long as the value written is not too large, but can differ.
-  // This kind of complication does not occur for positive type ids because
-  // type infos are output using a fixed width encoding.
-  // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i].
-  SmallVector<int, 16> FilterOffsets;
-  FilterOffsets.reserve(FilterIds.size());
-  int Offset = -1;
-  for(std::vector<unsigned>::const_iterator I = FilterIds.begin(),
-    E = FilterIds.end(); I != E; ++I) {
-    FilterOffsets.push_back(Offset);
-    Offset -= MCAsmInfo::getULEB128Size(*I);
-  }
-
-  // Compute the actions table and gather the first action index for each
-  // landing pad site.
-  SmallVector<ActionEntry, 32> Actions;
-  SmallVector<unsigned, 64> FirstActions;
-  FirstActions.reserve(LandingPads.size());
-
-  int FirstAction = 0;
-  unsigned SizeActions = 0;
-  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
-    const LandingPadInfo *LP = LandingPads[i];
-    const std::vector<int> &TypeIds = LP->TypeIds;
-    const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0;
-    unsigned SizeSiteActions = 0;
-
-    if (NumShared < TypeIds.size()) {
-      unsigned SizeAction = 0;
-      ActionEntry *PrevAction = 0;
-
-      if (NumShared) {
-        const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size();
-        assert(Actions.size());
-        PrevAction = &Actions.back();
-        SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) +
-          MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
-        for (unsigned j = NumShared; j != SizePrevIds; ++j) {
-          SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID);
-          SizeAction += -PrevAction->NextAction;
-          PrevAction = PrevAction->Previous;
-        }
-      }
-
-      // Compute the actions.
-      for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) {
-        int TypeID = TypeIds[I];
-        assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!");
-        int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID;
-        unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID);
-
-        int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0;
-        SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction);
-        SizeSiteActions += SizeAction;
-
-        ActionEntry Action = {ValueForTypeID, NextAction, PrevAction};
-        Actions.push_back(Action);
-
-        PrevAction = &Actions.back();
-      }
-
-      // Record the first action of the landing pad site.
-      FirstAction = SizeActions + SizeSiteActions - SizeAction + 1;
-    } // else identical - re-use previous FirstAction
-
-    FirstActions.push_back(FirstAction);
-
-    // Compute this sites contribution to size.
-    SizeActions += SizeSiteActions;
-  }
-
-  // Compute the call-site table.  Entries must be ordered by address.
-  SmallVector<CallSiteEntry, 64> CallSites;
-
-  RangeMapType PadMap;
-  for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) {
-    const LandingPadInfo *LandingPad = LandingPads[i];
-    for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) {
-      MCSymbol *BeginLabel = LandingPad->BeginLabels[j];
-      assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!");
-      PadRange P = { i, j };
-      PadMap[BeginLabel] = P;
-    }
-  }
-
-  bool MayThrow = false;
-  MCSymbol *LastLabel = 0;
-  for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
-        I != E; ++I) {
-    for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
-          MI != E; ++MI) {
-      if (!MI->isLabel()) {
-        MayThrow |= MI->isCall();
-        continue;
-      }
-
-      MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol();
-      assert(BeginLabel && "Invalid label!");
-
-      if (BeginLabel == LastLabel)
-        MayThrow = false;
-
-      RangeMapType::iterator L = PadMap.find(BeginLabel);
-
-      if (L == PadMap.end())
-        continue;
-
-      PadRange P = L->second;
-      const LandingPadInfo *LandingPad = LandingPads[P.PadIndex];
-
-      assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] &&
-              "Inconsistent landing pad map!");
-
-      // If some instruction between the previous try-range and this one may
-      // throw, create a call-site entry with no landing pad for the region
-      // between the try-ranges.
-      if (MayThrow) {
-        CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0};
-        CallSites.push_back(Site);
-      }
-
-      LastLabel = LandingPad->EndLabels[P.RangeIndex];
-      CallSiteEntry Site = {BeginLabel, LastLabel,
-        LandingPad->LandingPadLabel, FirstActions[P.PadIndex]};
-
-      assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel &&
-              "Invalid landing pad!");
-
-      // Try to merge with the previous call-site.
-      if (CallSites.size()) {
-        CallSiteEntry &Prev = CallSites.back();
-        if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) {
-          // Extend the range of the previous entry.
-          Prev.EndLabel = Site.EndLabel;
-          continue;
-        }
-      }
-
-      // Otherwise, create a new call-site.
-      CallSites.push_back(Site);
-    }
-  }
-  // If some instruction between the previous try-range and the end of the
-  // function may throw, create a call-site entry with no landing pad for the
-  // region following the try-range.
-  if (MayThrow) {
-    CallSiteEntry Site = {LastLabel, 0, 0, 0};
-    CallSites.push_back(Site);
-  }
-
-  // Final tallies.
-  unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start.
-                                            sizeof(int32_t) + // Site length.
-                                            sizeof(int32_t)); // Landing pad.
-  for (unsigned i = 0, e = CallSites.size(); i < e; ++i)
-    SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
-
-  unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize();
-
-  unsigned TypeOffset = sizeof(int8_t) + // Call site format
-                        // Call-site table length
-                        MCAsmInfo::getULEB128Size(SizeSites) +
-                        SizeSites + SizeActions + SizeTypes;
-
-  // Begin the exception table.
-  JCE->emitAlignmentWithFill(4, 0);
-  // Asm->EOL("Padding");
-
-  unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue();
-
-  // Emit the header.
-  JCE->emitByte(dwarf::DW_EH_PE_omit);
-  // Asm->EOL("LPStart format (DW_EH_PE_omit)");
-  JCE->emitByte(dwarf::DW_EH_PE_absptr);
-  // Asm->EOL("TType format (DW_EH_PE_absptr)");
-  JCE->emitULEB128Bytes(TypeOffset);
-  // Asm->EOL("TType base offset");
-  JCE->emitByte(dwarf::DW_EH_PE_udata4);
-  // Asm->EOL("Call site format (DW_EH_PE_udata4)");
-  JCE->emitULEB128Bytes(SizeSites);
-  // Asm->EOL("Call-site table length");
-
-  // Emit the landing pad site information.
-  for (unsigned i = 0; i < CallSites.size(); ++i) {
-    CallSiteEntry &S = CallSites[i];
-    intptr_t BeginLabelPtr = 0;
-    intptr_t EndLabelPtr = 0;
-
-    if (!S.BeginLabel) {
-      BeginLabelPtr = (intptr_t)StartFunction;
-      JCE->emitInt32(0);
-    } else {
-      BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel);
-      JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction);
-    }
-
-    // Asm->EOL("Region start");
-
-    if (!S.EndLabel)
-      EndLabelPtr = (intptr_t)EndFunction;
-    else
-      EndLabelPtr = JCE->getLabelAddress(S.EndLabel);
-
-    JCE->emitInt32(EndLabelPtr - BeginLabelPtr);
-    //Asm->EOL("Region length");
-
-    if (!S.PadLabel) {
-      JCE->emitInt32(0);
-    } else {
-      unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel);
-      JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction);
-    }
-    // Asm->EOL("Landing pad");
-
-    JCE->emitULEB128Bytes(S.Action);
-    // Asm->EOL("Action");
-  }
-
-  // Emit the actions.
-  for (unsigned I = 0, N = Actions.size(); I != N; ++I) {
-    ActionEntry &Action = Actions[I];
-
-    JCE->emitSLEB128Bytes(Action.ValueForTypeID);
-    //Asm->EOL("TypeInfo index");
-    JCE->emitSLEB128Bytes(Action.NextAction);
-    //Asm->EOL("Next action");
-  }
-
-  // Emit the type ids.
-  for (unsigned M = TypeInfos.size(); M; --M) {
-    const GlobalVariable *GV = TypeInfos[M - 1];
-
-    if (GV) {
-      if (TD->getPointerSize() == sizeof(int32_t))
-        JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV));
-      else
-        JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV));
-    } else {
-      if (TD->getPointerSize() == sizeof(int32_t))
-        JCE->emitInt32(0);
-      else
-        JCE->emitInt64(0);
-    }
-    // Asm->EOL("TypeInfo");
-  }
-
-  // Emit the filter typeids.
-  for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) {
-    unsigned TypeID = FilterIds[j];
-    JCE->emitULEB128Bytes(TypeID);
-    //Asm->EOL("Filter TypeInfo index");
-  }
-
-  JCE->emitAlignmentWithFill(4, 0);
-
-  return DwarfExceptionTable;
-}
-
-unsigned char*
-JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const {
-  unsigned PointerSize = TD->getPointerSize();
-  int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ?
-          PointerSize : -PointerSize;
-
-  unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue();
-  // EH Common Frame header
-  JCE->allocateSpace(4, 0);
-  unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
-  JCE->emitInt32((int)0);
-  JCE->emitByte(dwarf::DW_CIE_VERSION);
-  JCE->emitString(Personality ? "zPLR" : "zR");
-  JCE->emitULEB128Bytes(1);
-  JCE->emitSLEB128Bytes(stackGrowth);
-  JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true));
-
-  if (Personality) {
-    // Augmentation Size: 3 small ULEBs of one byte each, and the personality
-    // function which size is PointerSize.
-    JCE->emitULEB128Bytes(3 + PointerSize);
-
-    // We set the encoding of the personality as direct encoding because we use
-    // the function pointer. The encoding is not relative because the current
-    // PC value may be bigger than the personality function pointer.
-    if (PointerSize == 4) {
-      JCE->emitByte(dwarf::DW_EH_PE_sdata4);
-      JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality)));
-    } else {
-      JCE->emitByte(dwarf::DW_EH_PE_sdata8);
-      JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality)));
-    }
-
-    // LSDA encoding: This must match the encoding used in EmitEHFrame ()
-    if (PointerSize == 4)
-      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-    else
-      JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8);
-    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-  } else {
-    JCE->emitULEB128Bytes(1);
-    JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4);
-  }
-
-  EmitFrameMoves(0, MAI->getInitialFrameState());
-
-  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
-
-  JCE->emitInt32At((uintptr_t*)StartCommonPtr,
-                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
-                               FrameCommonBeginPtr));
-
-  return StartCommonPtr;
-}
-
-
-unsigned char*
-JITDwarfEmitter::EmitEHFrame(const Function* Personality,
-                             unsigned char* StartCommonPtr,
-                             unsigned char* StartFunction,
-                             unsigned char* EndFunction,
-                             unsigned char* ExceptionTable) const {
-  unsigned PointerSize = TD->getPointerSize();
-
-  // EH frame header.
-  unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue();
-  JCE->allocateSpace(4, 0);
-  unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue();
-  // FDE CIE Offset
-  JCE->emitInt32(FrameBeginPtr - StartCommonPtr);
-  JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue());
-  JCE->emitInt32(EndFunction - StartFunction);
-
-  // If there is a personality and landing pads then point to the language
-  // specific data area in the exception table.
-  if (Personality) {
-    JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8);
-
-    if (PointerSize == 4) {
-      if (!MMI->getLandingPads().empty())
-        JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
-      else
-        JCE->emitInt32((int)0);
-    } else {
-      if (!MMI->getLandingPads().empty())
-        JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue());
-      else
-        JCE->emitInt64((int)0);
-    }
-  } else {
-    JCE->emitULEB128Bytes(0);
-  }
-
-  // Indicate locations of function specific  callee saved registers in
-  // frame.
-  EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves());
-
-  JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop);
-
-  // Indicate the size of the table
-  JCE->emitInt32At((uintptr_t*)StartEHPtr,
-                   (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() -
-                               StartEHPtr));
-
-  // Double zeroes for the unwind runtime
-  if (PointerSize == 8) {
-    JCE->emitInt64(0);
-    JCE->emitInt64(0);
-  } else {
-    JCE->emitInt32(0);
-    JCE->emitInt32(0);
-  }
-
-  return StartEHPtr;
-}
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
deleted file mode 100644
index 98ac340..0000000
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h
+++ /dev/null
@@ -1,77 +0,0 @@
-//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines a JITDwarfEmitter object that is used by the JIT to
-// write dwarf tables to memory.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
-#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
-
-#include "llvm/Support/DataTypes.h"
-#include <vector>
-
-namespace llvm {
-
-class Function;
-class JIT;
-class JITCodeEmitter;
-class MachineFunction;
-class MachineModuleInfo;
-class MachineMove;
-class MCAsmInfo;
-class DataLayout;
-class TargetMachine;
-class TargetRegisterInfo;
-
-class JITDwarfEmitter {
-  const DataLayout* TD;
-  JITCodeEmitter* JCE;
-  const TargetRegisterInfo* RI;
-  const MCAsmInfo *MAI;
-  MachineModuleInfo* MMI;
-  JIT& Jit;
-  bool stackGrowthDirection;
-
-  unsigned char* EmitExceptionTable(MachineFunction* MF,
-                                    unsigned char* StartFunction,
-                                    unsigned char* EndFunction) const;
-
-  void EmitFrameMoves(intptr_t BaseLabelPtr,
-                      const std::vector<MachineMove> &Moves) const;
-
-  unsigned char* EmitCommonEHFrame(const Function* Personality) const;
-
-  unsigned char* EmitEHFrame(const Function* Personality,
-                             unsigned char* StartBufferPtr,
-                             unsigned char* StartFunction,
-                             unsigned char* EndFunction,
-                             unsigned char* ExceptionTable) const;
-
-public:
-
-  JITDwarfEmitter(JIT& jit);
-
-  unsigned char* EmitDwarfTable(MachineFunction& F,
-                                JITCodeEmitter& JCE,
-                                unsigned char* StartFunction,
-                                unsigned char* EndFunction,
-                                unsigned char* &EHFramePtr);
-
-
-  void setModuleInfo(MachineModuleInfo* Info) {
-    MMI = Info;
-  }
-};
-
-
-} // end namespace llvm
-
-#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index c273876..acbbfa1 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -14,7 +14,6 @@
 
 #define DEBUG_TYPE "jit"
 #include "JIT.h"
-#include "JITDwarfEmitter.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/OwningPtr.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -325,9 +324,6 @@ namespace {
     /// Resolver - This contains info about the currently resolved functions.
     JITResolver Resolver;
 
-    /// DE - The dwarf emitter for the jit.
-    OwningPtr<JITDwarfEmitter> DE;
-
     /// LabelLocations - This vector is a mapping from Label ID's to their
     /// address.
     DenseMap<MCSymbol*, uintptr_t> LabelLocations;
@@ -363,22 +359,16 @@ namespace {
     /// Instance of the JIT
     JIT *TheJIT;
 
-    bool JITExceptionHandling;
-
   public:
     JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
       : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
-        EmittedFunctions(this), TheJIT(&jit),
-        JITExceptionHandling(TM.Options.JITExceptionHandling) {
+        EmittedFunctions(this), TheJIT(&jit) {
       MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
       if (jit.getJITInfo().needsGOT()) {
         MemMgr->AllocateGOT();
         DEBUG(dbgs() << "JIT is managing a GOT\n");
       }
 
-      if (JITExceptionHandling) {
-        DE.reset(new JITDwarfEmitter(jit));
-      }
     }
     ~JITEmitter() {
       delete MemMgr;
@@ -460,7 +450,6 @@ namespace {
 
     virtual void setModuleInfo(MachineModuleInfo* Info) {
       MMI = Info;
-      if (DE.get()) DE->setModuleInfo(Info);
     }
 
   private:
@@ -964,40 +953,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
       }
     });
 
-  if (JITExceptionHandling) {
-    uintptr_t ActualSize = 0;
-    SavedBufferBegin = BufferBegin;
-    SavedBufferEnd = BufferEnd;
-    SavedCurBufferPtr = CurBufferPtr;
-    uint8_t *FrameRegister;
-
-    while (true) {
-      BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(),
-                                                               ActualSize);
-      BufferEnd = BufferBegin+ActualSize;
-      EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin;
-      uint8_t *EhStart;
-      FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, EhStart);
-
-      // If the buffer was large enough to hold the table then we are done.
-      if (CurBufferPtr != BufferEnd)
-        break;
-
-      // Try again with twice as much space.
-      ActualSize = (CurBufferPtr - BufferBegin) * 2;
-      MemMgr->deallocateExceptionTable(BufferBegin);
-    }
-    MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr,
-                              FrameRegister);
-    BufferBegin = SavedBufferBegin;
-    BufferEnd = SavedBufferEnd;
-    CurBufferPtr = SavedCurBufferPtr;
-
-    if (JITExceptionHandling) {
-      TheJIT->RegisterTable(F.getFunction(), FrameRegister);
-    }
-  }
-
   if (MMI)
     MMI->EndFunction();
 
@@ -1027,15 +982,10 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
     Emitted = EmittedFunctions.find(F);
   if (Emitted != EmittedFunctions.end()) {
     MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody);
-    MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable);
     TheJIT->NotifyFreeingMachineCode(Emitted->second.Code);
 
     EmittedFunctions.erase(Emitted);
   }
-
-  if (JITExceptionHandling) {
-    TheJIT->DeregisterTable(F);
-  }
 }
 
 
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 66aeb77..94db245 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -468,7 +468,11 @@ namespace {
       // Grow the required block size to account for the block header
       Size += sizeof(*CurBlock);
 
-      // FIXME: Alignement handling.
+      // Alignment handling.
+      if (!Alignment)
+        Alignment = 16;
+      Size += Alignment - 1;
+
       FreeRangeHeader* candidateBlock = FreeMemoryList;
       FreeRangeHeader* head = FreeMemoryList;
       FreeRangeHeader* iter = head->Next;
@@ -500,7 +504,8 @@ namespace {
       FreeMemoryList = candidateBlock->AllocateBlock();
       // Release the memory at the end of this block that isn't needed.
       FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size);
-      return (uint8_t *)(CurBlock + 1);
+      uintptr_t unalignedAddr = (uintptr_t)CurBlock + sizeof(*CurBlock);
+      return (uint8_t*)RoundUpToAlignment((uint64_t)unalignedAddr, Alignment);
     }
 
     /// allocateDataSection - Allocate memory for a data section.
@@ -509,30 +514,10 @@ namespace {
       return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
     }
 
-    bool applyPermissions(std::string *ErrMsg) {
+    bool finalizeMemory(std::string *ErrMsg) {
       return false;
     }
 
-    /// startExceptionTable - Use startFunctionBody to allocate memory for the
-    /// function's exception table.
-    uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
-      return startFunctionBody(F, ActualSize);
-    }
-
-    /// endExceptionTable - The exception table of F is now allocated,
-    /// and takes the memory in the range [TableStart,TableEnd).
-    void endExceptionTable(const Function *F, uint8_t *TableStart,
-                           uint8_t *TableEnd, uint8_t* FrameRegister) {
-      assert(TableEnd > TableStart);
-      assert(TableStart == (uint8_t *)(CurBlock+1) &&
-             "Mismatched table start/end!");
-
-      uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
-
-      // Release the memory at the end of this block that isn't needed.
-      FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
-    }
-
     uint8_t *getGOTBase() const {
       return GOTBase;
     }
@@ -557,12 +542,6 @@ namespace {
       if (Body) deallocateBlock(Body);
     }
 
-    /// deallocateExceptionTable - Deallocate memory for the specified
-    /// exception table.
-    void deallocateExceptionTable(void *ET) {
-      if (ET) deallocateBlock(ET);
-    }
-
     /// setMemoryWritable - When code generation is in progress,
     /// the code pages may need permissions changed.
     void setMemoryWritable()
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
index fee10e1..09dd924 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ExecutionEngine/MCJIT.h"
 #include "llvm/ExecutionEngine/ObjectBuffer.h"
 #include "llvm/ExecutionEngine/ObjectImage.h"
+#include "llvm/ExecutionEngine/SectionMemoryManager.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
@@ -38,7 +39,7 @@ extern "C" void LLVMLinkInMCJIT() {
 
 ExecutionEngine *MCJIT::createJIT(Module *M,
                                   std::string *ErrorStr,
-                                  JITMemoryManager *JMM,
+                                  RTDyldMemoryManager *MemMgr,
                                   bool GVsWithCode,
                                   TargetMachine *TM) {
   // Try to register the program as a source of symbols to resolve against.
@@ -46,13 +47,14 @@ ExecutionEngine *MCJIT::createJIT(Module *M,
   // FIXME: Don't do this here.
   sys::DynamicLibrary::LoadLibraryPermanently(0, NULL);
 
-  return new MCJIT(M, TM, JMM, GVsWithCode);
+  return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(),
+                   GVsWithCode);
 }
 
 MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM,
              bool AllocateGVsWithCode)
   : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(MM), Dyld(MM),
-    isCompiled(false), M(m)  {
+    IsLoaded(false), M(m), ObjCache(0) {
 
   setDataLayout(TM->getDataLayout());
 }
@@ -64,7 +66,11 @@ MCJIT::~MCJIT() {
   delete TM;
 }
 
-void MCJIT::emitObject(Module *m) {
+void MCJIT::setObjectCache(ObjectCache* NewCache) {
+  ObjCache = NewCache;
+}
+
+ObjectBufferStream* MCJIT::emitObject(Module *m) {
   /// Currently, MCJIT only supports a single module and the module passed to
   /// this function call is expected to be the contained module.  The module
   /// is passed as a parameter here to prepare for multiple module support in
@@ -77,30 +83,66 @@ void MCJIT::emitObject(Module *m) {
   // FIXME: Track compilation state on a per-module basis when multiple modules
   //        are supported.
   // Re-compilation is not supported
-  if (isCompiled)
-    return;
+  assert(!IsLoaded);
 
   PassManager PM;
 
   PM.add(new DataLayout(*TM->getDataLayout()));
 
   // The RuntimeDyld will take ownership of this shortly
-  OwningPtr<ObjectBufferStream> Buffer(new ObjectBufferStream());
+  OwningPtr<ObjectBufferStream> CompiledObject(new ObjectBufferStream());
 
   // Turn the machine code intermediate representation into bytes in memory
   // that may be executed.
-  if (TM->addPassesToEmitMC(PM, Ctx, Buffer->getOStream(), false)) {
+  if (TM->addPassesToEmitMC(PM, Ctx, CompiledObject->getOStream(), false)) {
     report_fatal_error("Target does not support MC emission!");
   }
 
   // Initialize passes.
   PM.run(*m);
   // Flush the output buffer to get the generated code into memory
-  Buffer->flush();
+  CompiledObject->flush();
+
+  // If we have an object cache, tell it about the new object.
+  // Note that we're using the compiled image, not the loaded image (as below).
+  if (ObjCache) {
+    // MemoryBuffer is a thin wrapper around the actual memory, so it's OK
+    // to create a temporary object here and delete it after the call.
+    OwningPtr<MemoryBuffer> MB(CompiledObject->getMemBuffer());
+    ObjCache->notifyObjectCompiled(m, MB.get());
+  }
+
+  return CompiledObject.take();
+}
+
+void MCJIT::loadObject(Module *M) {
+
+  // Get a thread lock to make sure we aren't trying to load multiple times
+  MutexGuard locked(lock);
+
+  // FIXME: Track compilation state on a per-module basis when multiple modules
+  //        are supported.
+  // Re-compilation is not supported
+  if (IsLoaded)
+    return;
+
+  OwningPtr<ObjectBuffer> ObjectToLoad;
+  // Try to load the pre-compiled object from cache if possible
+  if (0 != ObjCache) {
+    OwningPtr<MemoryBuffer> PreCompiledObject(ObjCache->getObject(M));
+    if (0 != PreCompiledObject.get())
+      ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.take()));
+  }
+
+  // If the cache did not contain a suitable object, compile the object
+  if (!ObjectToLoad) {
+    ObjectToLoad.reset(emitObject(M));
+    assert(ObjectToLoad.get() && "Compilation did not produce an object.");
+  }
 
   // Load the object into the dynamic linker.
   // handing off ownership of the buffer
-  LoadedObject.reset(Dyld.loadObject(Buffer.take()));
+  LoadedObject.reset(Dyld.loadObject(ObjectToLoad.take()));
   if (!LoadedObject)
     report_fatal_error(Dyld.getErrorString());
 
@@ -113,7 +155,7 @@ void MCJIT::emitObject(Module *m) {
   NotifyObjectEmitted(*LoadedObject);
 
   // FIXME: Add support for per-module compilation state
-  isCompiled = true;
+  IsLoaded = true;
 }
 
 // FIXME: Add a parameter to identify which object is being finalized when
@@ -122,22 +164,21 @@ void MCJIT::emitObject(Module *m) {
 // protection in the interface.
 void MCJIT::finalizeObject() {
   // If the module hasn't been compiled, just do that.
-  if (!isCompiled) {
-    // If the call to Dyld.resolveRelocations() is removed from emitObject()
+  if (!IsLoaded) {
+    // If the call to Dyld.resolveRelocations() is removed from loadObject()
     // we'll need to do that here.
-    emitObject(M);
-
-    // Set page permissions.
-    MemMgr->applyPermissions();
-
-    return;
+    loadObject(M);
+  } else {
+    // Resolve any relocations.
+    Dyld.resolveRelocations();
   }
 
-  // Resolve any relocations.
-  Dyld.resolveRelocations();
+  StringRef EHData = Dyld.getEHFrameSection();
+  if (!EHData.empty())
+    MemMgr->registerEHFrames(EHData);
 
   // Set page permissions.
-  MemMgr->applyPermissions();
+  MemMgr->finalizeMemory();
 }
 
 void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) {
@@ -151,8 +192,8 @@ void *MCJIT::getPointerToFunction(Function *F) {
   // dies.
 
   // FIXME: Add support for per-module compilation state
-  if (!isCompiled)
-    emitObject(M);
+  if (!IsLoaded)
+    loadObject(M);
 
   if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
     bool AbortOnFailure = !F->hasExternalWeakLinkage();
@@ -284,8 +325,8 @@ GenericValue MCJIT::runFunction(Function *F,
 void *MCJIT::getPointerToNamedFunction(const std::string &Name,
                                        bool AbortOnFailure) {
   // FIXME: Add support for per-module compilation state
-  if (!isCompiled)
-    emitObject(M);
+  if (!IsLoaded)
+    loadObject(M);
 
   if (!isSymbolSearchingDisabled() && MemMgr) {
     void *ptr = MemMgr->getPointerToNamedFunction(Name, false);
diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h
index 283a8e5..a899d4f 100644
--- a/lib/ExecutionEngine/MCJIT/MCJIT.h
+++ b/lib/ExecutionEngine/MCJIT/MCJIT.h
@@ -12,6 +12,7 @@
 
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ExecutionEngine/ExecutionEngine.h"
+#include "llvm/ExecutionEngine/ObjectCache.h"
 #include "llvm/ExecutionEngine/RuntimeDyld.h"
 #include "llvm/PassManager.h"
 
@@ -34,16 +35,30 @@ class MCJIT : public ExecutionEngine {
   SmallVector<JITEventListener*, 2> EventListeners;
 
   // FIXME: Add support for multiple modules
-  bool isCompiled;
+  bool IsLoaded;
   Module *M;
   OwningPtr<ObjectImage> LoadedObject;
 
+  // An optional ObjectCache to be notified of compiled objects and used to
+  // perform lookup of pre-compiled code to avoid re-compilation.
+  ObjectCache *ObjCache;
+
 public:
   ~MCJIT();
 
   /// @name ExecutionEngine interface implementation
   /// @{
 
+  /// Sets the object manager that MCJIT should use to avoid compilation.
+  virtual void setObjectCache(ObjectCache *manager);
+
+  /// finalizeObject - ensure the module is fully processed and is usable.
+  ///
+  /// It is the user-level function for completing the process of making the
+  /// object usable for execution. It should be called after sections within an
+  /// object have been relocated using mapSectionAddress.  When this method is
+  /// called the MCJIT execution engine will reapply relocations for a loaded
+  /// object.
   virtual void finalizeObject();
 
   virtual void *getPointerToBasicBlock(BasicBlock *BB);
@@ -90,7 +105,7 @@ public:
 
   static ExecutionEngine *createJIT(Module *M,
                                     std::string *ErrorStr,
-                                    JITMemoryManager *JMM,
+                                    RTDyldMemoryManager *MemMgr,
                                     bool GVsWithCode,
                                     TargetMachine *TM);
 
@@ -102,7 +117,9 @@ protected:
   /// this function call is expected to be the contained module.  The module
   /// is passed as a parameter here to prepare for multiple module support in 
   /// the future.
-  void emitObject(Module *M);
+  ObjectBufferStream* emitObject(Module *M);
+
+  void loadObject(Module *M);
 
   void NotifyObjectEmitted(const ObjectImage& Obj);
   void NotifyFreeingObject(const ObjectImage& Obj);
diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
index fa35acd..650832e 100644
--- a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
+++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp
@@ -14,19 +14,8 @@
 
 #include "llvm/Config/config.h"
 #include "llvm/ExecutionEngine/SectionMemoryManager.h"
-#include "llvm/Support/DynamicLibrary.h"
 #include "llvm/Support/MathExtras.h"
 
-#ifdef __linux__
-  // These includes used by SectionMemoryManager::getPointerToNamedFunction()
-  // for Glibc trickery. See comments in this function for more information.
-  #ifdef HAVE_SYS_STAT_H
-    #include <sys/stat.h>
-  #endif
-  #include <fcntl.h>
-  #include <unistd.h>
-#endif
-
 namespace llvm {
 
 uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size,
@@ -111,7 +100,7 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup,
   return (uint8_t*)Addr;
 }
 
-bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
+bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg)
 {
   // FIXME: Should in-progress permissions be reverted if an error occurs?
   error_code ec;
@@ -138,6 +127,11 @@ bool SectionMemoryManager::applyPermissions(std::string *ErrMsg)
 
   // Read-write data memory already has the correct permissions
 
+  // Some platforms with separate data cache and instruction cache require
+  // explicit cache flush, otherwise JIT code manipulations (like resolved
+  // relocations) will get to the data cache but not to the instruction cache.
+  invalidateInstructionCache();
+
   return false;
 }
 
@@ -162,57 +156,6 @@ void SectionMemoryManager::invalidateInstructionCache() {
                                             CodeMem.AllocatedMem[i].size());
 }
 
-static int jit_noop() {
-  return 0;
-}
-
-void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name,
-                                                       bool AbortOnFailure) {
-#if defined(__linux__)
-  //===--------------------------------------------------------------------===//
-  // Function stubs that are invoked instead of certain library calls
-  //
-  // Force the following functions to be linked in to anything that uses the
-  // JIT. This is a hack designed to work around the all-too-clever Glibc
-  // strategy of making these functions work differently when inlined vs. when
-  // not inlined, and hiding their real definitions in a separate archive file
-  // that the dynamic linker can't see. For more info, search for
-  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
-  if (Name == "stat") return (void*)(intptr_t)&stat;
-  if (Name == "fstat") return (void*)(intptr_t)&fstat;
-  if (Name == "lstat") return (void*)(intptr_t)&lstat;
-  if (Name == "stat64") return (void*)(intptr_t)&stat64;
-  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
-  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
-  if (Name == "atexit") return (void*)(intptr_t)&atexit;
-  if (Name == "mknod") return (void*)(intptr_t)&mknod;
-#endif // __linux__
-
-  // We should not invoke parent's ctors/dtors from generated main()!
-  // On Mingw and Cygwin, the symbol __main is resolved to
-  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
-  // (and register wrong callee's dtors with atexit(3)).
-  // We expect ExecutionEngine::runStaticConstructorsDestructors()
-  // is called before ExecutionEngine::runFunctionAsMain() is called.
-  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
-
-  const char *NameStr = Name.c_str();
-  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
-  if (Ptr) return Ptr;
-
-  // If it wasn't found and if it starts with an underscore ('_') character,
-  // try again without the underscore.
-  if (NameStr[0] == '_') {
-    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
-    if (Ptr) return Ptr;
-  }
-
-  if (AbortOnFailure)
-    report_fatal_error("Program used external function '" + Name +
-                      "' which could not be resolved!");
-  return 0;
-}
-
 SectionMemoryManager::~SectionMemoryManager() {
   for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i)
     sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]);
diff --git a/lib/ExecutionEngine/RTDyldMemoryManager.cpp b/lib/ExecutionEngine/RTDyldMemoryManager.cpp
new file mode 100644
index 0000000..4e76457
--- /dev/null
+++ b/lib/ExecutionEngine/RTDyldMemoryManager.cpp
@@ -0,0 +1,118 @@
+//===-- RTDyldMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implementation of the runtime dynamic memory manager base class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Config/config.h"
+#include "llvm/ExecutionEngine/RTDyldMemoryManager.h"
+#include "llvm/Support/DynamicLibrary.h"
+#include "llvm/Support/ErrorHandling.h"
+
+#include <cstdlib>
+
+#ifdef __linux__
+  // These includes used by RTDyldMemoryManager::getPointerToNamedFunction()
+  // for Glibc trickery. See comments in this function for more information.
+  #ifdef HAVE_SYS_STAT_H
+    #include <sys/stat.h>
+  #endif
+  #include <fcntl.h>
+  #include <unistd.h>
+#endif
+
+namespace llvm {
+
+RTDyldMemoryManager::~RTDyldMemoryManager() {}
+
+// Determine whether we can register EH tables.
+#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \
+     !defined(__USING_SJLJ_EXCEPTIONS__))
+#define HAVE_EHTABLE_SUPPORT 1
+#else
+#define HAVE_EHTABLE_SUPPORT 0
+#endif
+
+#if HAVE_EHTABLE_SUPPORT
+extern "C" void __register_frame(void*);
+
+static const char *processFDE(const char *Entry) {
+  const char *P = Entry;
+  uint32_t Length = *((const uint32_t *)P);
+  P += 4;
+  uint32_t Offset = *((const uint32_t *)P);
+  if (Offset != 0)
+    __register_frame(const_cast<char *>(Entry));
+  return P + Length;
+}
+#endif
+
+void RTDyldMemoryManager::registerEHFrames(StringRef SectionData) {
+#if HAVE_EHTABLE_SUPPORT
+  const char *P = SectionData.data();
+  const char *End = SectionData.data() + SectionData.size();
+  do  {
+    P = processFDE(P);
+  } while(P != End);
+#endif
+}
+
+static int jit_noop() {
+  return 0;
+}
+
+void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name,
+                                                     bool AbortOnFailure) {
+#if defined(__linux__)
+  //===--------------------------------------------------------------------===//
+  // Function stubs that are invoked instead of certain library calls
+  //
+  // Force the following functions to be linked in to anything that uses the
+  // JIT. This is a hack designed to work around the all-too-clever Glibc
+  // strategy of making these functions work differently when inlined vs. when
+  // not inlined, and hiding their real definitions in a separate archive file
+  // that the dynamic linker can't see. For more info, search for
+  // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274.
+  if (Name == "stat") return (void*)(intptr_t)&stat;
+  if (Name == "fstat") return (void*)(intptr_t)&fstat;
+  if (Name == "lstat") return (void*)(intptr_t)&lstat;
+  if (Name == "stat64") return (void*)(intptr_t)&stat64;
+  if (Name == "fstat64") return (void*)(intptr_t)&fstat64;
+  if (Name == "lstat64") return (void*)(intptr_t)&lstat64;
+  if (Name == "atexit") return (void*)(intptr_t)&atexit;
+  if (Name == "mknod") return (void*)(intptr_t)&mknod;
+#endif // __linux__
+
+  // We should not invoke parent's ctors/dtors from generated main()!
+  // On Mingw and Cygwin, the symbol __main is resolved to
+  // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors
+  // (and register wrong callee's dtors with atexit(3)).
+  // We expect ExecutionEngine::runStaticConstructorsDestructors()
+  // is called before ExecutionEngine::runFunctionAsMain() is called.
+  if (Name == "__main") return (void*)(intptr_t)&jit_noop;
+
+  const char *NameStr = Name.c_str();
+  void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr);
+  if (Ptr) return Ptr;
+
+  // If it wasn't found and if it starts with an underscore ('_') character,
+  // try again without the underscore.
+  if (NameStr[0] == '_') {
+    Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1);
+    if (Ptr) return Ptr;
+  }
+
+  if (AbortOnFailure)
+    report_fatal_error("Program used external function '" + Name +
+                       "' which could not be resolved!");
+  return 0;
+}
+
+} // namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
index 409b25f..943622f 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp
@@ -17,18 +17,22 @@
 #include "RuntimeDyldELF.h"
 #include "RuntimeDyldImpl.h"
 #include "RuntimeDyldMachO.h"
+#include "llvm/Support/FileSystem.h"
 #include "llvm/Support/MathExtras.h"
-#include "llvm/Support/Path.h"
+#include "llvm/Object/ELF.h"
 
 using namespace llvm;
 using namespace llvm::object;
 
 // Empty out-of-line virtual destructor as the key function.
-RTDyldMemoryManager::~RTDyldMemoryManager() {}
 RuntimeDyldImpl::~RuntimeDyldImpl() {}
 
 namespace llvm {
 
+StringRef RuntimeDyldImpl::getEHFrameSection() {
+  return StringRef();
+}
+
 // Resolve the relocations for all symbols we currently know about.
 void RuntimeDyldImpl::resolveRelocations() {
   // First, resolve relocations associated with external symbols.
@@ -96,7 +100,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
     bool isCommon = flags & SymbolRef::SF_Common;
     if (isCommon) {
       // Add the common symbols to a list.  We'll allocate them all below.
-      uint64_t Align = getCommonSymbolAlignment(*i);
+      uint32_t Align;
+      Check(i->getAlignment(Align));
       uint64_t Size = 0;
       Check(i->getSize(Size));
       CommonSize += Size + Align;
@@ -142,6 +147,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
     bool isFirstRelocation = true;
     unsigned SectionID = 0;
     StubMap Stubs;
+    section_iterator RelocatedSection = si->getRelocatedSection();
 
     for (relocation_iterator i = si->begin_relocations(),
          e = si->end_relocations(); i != e; i.increment(err)) {
@@ -149,23 +155,14 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) {
 
       // If it's the first relocation in this section, find its SectionID
       if (isFirstRelocation) {
-        SectionID = findOrEmitSection(*obj, *si, true, LocalSections);
+        SectionID =
+            findOrEmitSection(*obj, *RelocatedSection, true, LocalSections);
         DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n");
         isFirstRelocation = false;
       }
 
-      ObjRelocationInfo RI;
-      RI.SectionID = SectionID;
-      Check(i->getAdditionalInfo(RI.AdditionalInfo));
-      Check(i->getOffset(RI.Offset));
-      Check(i->getSymbol(RI.Symbol));
-      Check(i->getType(RI.Type));
-
-      DEBUG(dbgs() << "\t\tAddend: " << RI.AdditionalInfo
-                   << " Offset: " << format("%p", (uintptr_t)RI.Offset)
-                   << " Type: " << (uint32_t)(RI.Type & 0xffffffffL)
-                   << "\n");
-      processRelocationRef(RI, *obj, LocalSections, LocalSymbols, Stubs);
+      processRelocationRef(SectionID, *i, *obj, LocalSections, LocalSymbols,
+			   Stubs);
     }
   }
 
@@ -183,7 +180,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj,
   if (!Addr)
     report_fatal_error("Unable to allocate memory for common symbols!");
   uint64_t Offset = 0;
-  Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, TotalSize, 0));
+  Sections.push_back(SectionEntry(StringRef(), Addr, TotalSize, 0));
   memset(Addr, 0, TotalSize);
 
   DEBUG(dbgs() << "emitCommonSection SectionID: " << SectionID
@@ -220,11 +217,25 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
   unsigned StubBufSize = 0,
            StubSize = getMaxStubSize();
   error_code err;
+  const ObjectFile *ObjFile = Obj.getObjectFile();
+  // FIXME: this is an inefficient way to handle this. We should computed the
+  // necessary section allocation size in loadObject by walking all the sections
+  // once.
   if (StubSize > 0) {
-    for (relocation_iterator i = Section.begin_relocations(),
-         e = Section.end_relocations(); i != e; i.increment(err), Check(err))
-      StubBufSize += StubSize;
+    for (section_iterator SI = ObjFile->begin_sections(),
+           SE = ObjFile->end_sections();
+         SI != SE; SI.increment(err), Check(err)) {
+      section_iterator RelSecI = SI->getRelocatedSection();
+      if (!(RelSecI == Section))
+        continue;
+
+      for (relocation_iterator I = SI->begin_relocations(),
+             E = SI->end_relocations(); I != E; I.increment(err), Check(err)) {
+        StubBufSize += StubSize;
+      }
+    }
   }
+
   StringRef data;
   uint64_t Alignment64;
   Check(Section.getContents(data));
@@ -243,6 +254,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
   Check(Section.isReadOnlyData(IsReadOnly));
   Check(Section.getSize(DataSize));
   Check(Section.getName(Name));
+  if (StubSize > 0) {
+    unsigned StubAlignment = getStubAlignment();
+    unsigned EndAlignment = (DataSize | Alignment) & -(DataSize | Alignment);
+    if (StubAlignment > EndAlignment)
+      StubBufSize += StubAlignment - EndAlignment;
+  }
 
   unsigned Allocate;
   unsigned SectionID = Sections.size();
@@ -295,8 +312,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj,
                  << "\n");
   }
 
-  Sections.push_back(SectionEntry(Name, Addr, Allocate, DataSize,
-				  (uintptr_t)pData));
+  Sections.push_back(SectionEntry(Name, Addr, DataSize, (uintptr_t)pData));
   return SectionID;
 }
 
@@ -339,7 +355,25 @@ void RuntimeDyldImpl::addRelocationForSymbol(const RelocationEntry &RE,
 }
 
 uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
-  if (Arch == Triple::arm) {
+  if (Arch == Triple::aarch64) {
+    // This stub has to be able to access the full address space,
+    // since symbol lookup won't necessarily find a handy, in-range,
+    // PLT stub for functions which could be anywhere.
+    uint32_t *StubAddr = (uint32_t*)Addr;
+
+    // Stub can use ip0 (== x16) to calculate address
+    *StubAddr = 0xd2e00010; // movz ip0, #:abs_g3:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2c00010; // movk ip0, #:abs_g2_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2a00010; // movk ip0, #:abs_g1_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xf2800010; // movk ip0, #:abs_g0_nc:<addr>
+    StubAddr++;
+    *StubAddr = 0xd61f0200; // br ip0
+
+    return Addr;
+  } else if (Arch == Triple::arm) {
     // TODO: There is only ARM far stub now. We should add the Thumb stub,
     // and stubs for branches Thumb - ARM and ARM - Thumb.
     uint32_t *StubAddr = (uint32_t*)Addr;
@@ -362,7 +396,7 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
     StubAddr++;
     *StubAddr = NopInstr;
     return Addr;
-  } else if (Arch == Triple::ppc64) {
+  } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
     // PowerPC64 stub: the address points to a function descriptor
     // instead of the function itself. Load the function address
     // on r11 and sets it to control register. Also loads the function
@@ -380,6 +414,13 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) {
     writeInt32BE(Addr+40, 0x4E800420); // bctr
 
     return Addr;
+  } else if (Arch == Triple::systemz) {
+    writeInt16BE(Addr,    0xC418);     // lgrl %r1,.+8
+    writeInt16BE(Addr+2,  0x0000);
+    writeInt16BE(Addr+4,  0x0004);
+    writeInt16BE(Addr+6,  0x07F1);     // brc 15,%r1
+    // 8-byte address stored at Addr + 8
+    return Addr;
   }
   return Addr;
 }
@@ -401,26 +442,14 @@ void RuntimeDyldImpl::reassignSectionAddress(unsigned SectionID,
   Sections[SectionID].LoadAddress = Addr;
 }
 
-void RuntimeDyldImpl::resolveRelocationEntry(const RelocationEntry &RE,
-                                             uint64_t Value) {
-  // Ignore relocations for sections that were not loaded
-  if (Sections[RE.SectionID].Address != 0) {
-    DEBUG(dbgs() << "\tSectionID: " << RE.SectionID
-          << " + " << RE.Offset << " ("
-          << format("%p", Sections[RE.SectionID].Address + RE.Offset) << ")"
-          << " RelType: " << RE.RelType
-          << " Addend: " << RE.Addend
-          << "\n");
-
-    resolveRelocation(Sections[RE.SectionID], RE.Offset,
-                      Value, RE.RelType, RE.Addend);
-  }
-}
-
 void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs,
                                             uint64_t Value) {
   for (unsigned i = 0, e = Relocs.size(); i != e; ++i) {
-    resolveRelocationEntry(Relocs[i], Value);
+    const RelocationEntry &RE = Relocs[i];
+    // Ignore relocations for sections that were not loaded
+    if (Sections[RE.SectionID].Address == 0)
+      continue;
+    resolveRelocation(RE, Value);
   }
 }
 
@@ -472,33 +501,34 @@ RuntimeDyld::~RuntimeDyld() {
 
 ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) {
   if (!Dyld) {
-    sys::LLVMFileType type = sys::IdentifyFileType(
-            InputBuffer->getBufferStart(),
-            static_cast<unsigned>(InputBuffer->getBufferSize()));
-    switch (type) {
-      case sys::ELF_Relocatable_FileType:
-      case sys::ELF_Executable_FileType:
-      case sys::ELF_SharedObject_FileType:
-      case sys::ELF_Core_FileType:
-        Dyld = new RuntimeDyldELF(MM);
-        break;
-      case sys::Mach_O_Object_FileType:
-      case sys::Mach_O_Executable_FileType:
-      case sys::Mach_O_FixedVirtualMemorySharedLib_FileType:
-      case sys::Mach_O_Core_FileType:
-      case sys::Mach_O_PreloadExecutable_FileType:
-      case sys::Mach_O_DynamicallyLinkedSharedLib_FileType:
-      case sys::Mach_O_DynamicLinker_FileType:
-      case sys::Mach_O_Bundle_FileType:
-      case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType:
-      case sys::Mach_O_DSYMCompanion_FileType:
-        Dyld = new RuntimeDyldMachO(MM);
-        break;
-      case sys::Unknown_FileType:
-      case sys::Bitcode_FileType:
-      case sys::Archive_FileType:
-      case sys::COFF_FileType:
-        report_fatal_error("Incompatible object format!");
+    sys::fs::file_magic Type =
+        sys::fs::identify_magic(InputBuffer->getBuffer());
+    switch (Type) {
+    case sys::fs::file_magic::elf_relocatable:
+    case sys::fs::file_magic::elf_executable:
+    case sys::fs::file_magic::elf_shared_object:
+    case sys::fs::file_magic::elf_core:
+      Dyld = new RuntimeDyldELF(MM);
+      break;
+    case sys::fs::file_magic::macho_object:
+    case sys::fs::file_magic::macho_executable:
+    case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib:
+    case sys::fs::file_magic::macho_core:
+    case sys::fs::file_magic::macho_preload_executable:
+    case sys::fs::file_magic::macho_dynamically_linked_shared_lib:
+    case sys::fs::file_magic::macho_dynamic_linker:
+    case sys::fs::file_magic::macho_bundle:
+    case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub:
+    case sys::fs::file_magic::macho_dsym_companion:
+      Dyld = new RuntimeDyldMachO(MM);
+      break;
+    case sys::fs::file_magic::unknown:
+    case sys::fs::file_magic::bitcode:
+    case sys::fs::file_magic::archive:
+    case sys::fs::file_magic::coff_object:
+    case sys::fs::file_magic::pecoff_executable:
+    case sys::fs::file_magic::macho_universal_binary:
+      report_fatal_error("Incompatible object format!");
     }
   } else {
     if (!Dyld->isCompatibleFormat(InputBuffer))
@@ -534,4 +564,8 @@ StringRef RuntimeDyld::getErrorString() {
   return Dyld->getErrorString();
 }
 
+StringRef RuntimeDyld::getEHFrameSection() {
+  return Dyld->getEHFrameSection();
+}
+
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
index b8537b1..cd99c3c 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp
@@ -41,7 +41,7 @@ error_code check(error_code Err) {
 template<class ELFT>
 class DyldELFObject
   : public ELFObjectFile<ELFT> {
-  LLVM_ELF_IMPORT_TYPES(ELFT)
+  LLVM_ELF_IMPORT_TYPES_ELFT(ELFT)
 
   typedef Elf_Shdr_Impl<ELFT> Elf_Shdr;
   typedef Elf_Sym_Impl<ELFT> Elf_Sym;
@@ -151,6 +151,14 @@ void DyldELFObject<ELFT>::updateSymbolAddress(const SymbolRef &SymRef,
 
 namespace llvm {
 
+StringRef RuntimeDyldELF::getEHFrameSection() {
+  for (int i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Name == ".eh_frame")
+      return StringRef((const char*)Sections[i].Address, Sections[i].Size);
+  }
+  return StringRef();
+}
+
 ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) {
   if (Buffer->getBufferSize() < ELF::EI_NIDENT)
     llvm_unreachable("Unexpected ELF object size");
@@ -269,12 +277,115 @@ void RuntimeDyldELF::resolveX86Relocation(const SectionEntry &Section,
   }
 }
 
+void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section,
+                                              uint64_t Offset,
+                                              uint64_t Value,
+                                              uint32_t Type,
+                                              int64_t Addend) {
+  uint32_t *TargetPtr = reinterpret_cast<uint32_t*>(Section.Address + Offset);
+  uint64_t FinalAddress = Section.LoadAddress + Offset;
+
+  DEBUG(dbgs() << "resolveAArch64Relocation, LocalAddress: 0x"
+               << format("%llx", Section.Address + Offset)
+               << " FinalAddress: 0x" << format("%llx",FinalAddress)
+               << " Value: 0x" << format("%llx",Value)
+               << " Type: 0x" << format("%x",Type)
+               << " Addend: 0x" << format("%llx",Addend)
+               << "\n");
+
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_AARCH64_ABS64: {
+    uint64_t *TargetPtr = reinterpret_cast<uint64_t*>(Section.Address + Offset);
+    *TargetPtr = Value + Addend;
+    break;
+  }
+  case ELF::R_AARCH64_PREL32: {
+    uint64_t Result = Value + Addend - FinalAddress;
+    assert(static_cast<int64_t>(Result) >= INT32_MIN && 
+           static_cast<int64_t>(Result) <= UINT32_MAX);
+    *TargetPtr = static_cast<uint32_t>(Result & 0xffffffffU);
+    break;
+  }
+  case ELF::R_AARCH64_CALL26: // fallthrough
+  case ELF::R_AARCH64_JUMP26: {
+    // Operation: S+A-P. Set Call or B immediate value to bits fff_fffc of the
+    // calculation.
+    uint64_t BranchImm = Value + Addend - FinalAddress;
+
+    // "Check that -2^27 <= result < 2^27".
+    assert(-(1LL << 27) <= static_cast<int64_t>(BranchImm) && 
+           static_cast<int64_t>(BranchImm) < (1LL << 27));
+
+    // AArch64 code is emitted with .rela relocations. The data already in any
+    // bits affected by the relocation on entry is garbage.
+    *TargetPtr &= 0xfc000000U;
+    // Immediate goes in bits 25:0 of B and BL.
+    *TargetPtr |= static_cast<uint32_t>(BranchImm & 0xffffffcU) >> 2;
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G3: {
+    uint64_t Result = Value + Addend;
+
+    // AArch64 code is emitted with .rela relocations. The data already in any
+    // bits affected by the relocation on entry is garbage.
+    *TargetPtr &= 0xffe0001fU;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= Result >> (48 - 5);
+    // Shift must be "lsl #48", in bits 22:21
+    assert((*TargetPtr >> 21 & 0x3) == 3 && "invalid shift for relocation");
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G2_NC: {
+    uint64_t Result = Value + Addend;
+
+
+    // AArch64 code is emitted with .rela relocations. The data already in any
+    // bits affected by the relocation on entry is garbage.
+    *TargetPtr &= 0xffe0001fU;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffff00000000ULL) >> (32 - 5));
+    // Shift must be "lsl #32", in bits 22:21
+    assert((*TargetPtr >> 21 & 0x3) == 2 && "invalid shift for relocation");
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G1_NC: {
+    uint64_t Result = Value + Addend;
+
+    // AArch64 code is emitted with .rela relocations. The data already in any
+    // bits affected by the relocation on entry is garbage.
+    *TargetPtr &= 0xffe0001fU;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffff0000U) >> (16 - 5));
+    // Shift must be "lsl #16", in bits 22:2
+    assert((*TargetPtr >> 21 & 0x3) == 1 && "invalid shift for relocation");
+    break;
+  }
+  case ELF::R_AARCH64_MOVW_UABS_G0_NC: {
+    uint64_t Result = Value + Addend;
+
+    // AArch64 code is emitted with .rela relocations. The data already in any
+    // bits affected by the relocation on entry is garbage.
+    *TargetPtr &= 0xffe0001fU;
+    // Immediate goes in bits 20:5 of MOVZ/MOVK instruction
+    *TargetPtr |= ((Result & 0xffffU) << 5);
+    // Shift must be "lsl #0", in bits 22:21.
+    assert((*TargetPtr >> 21 & 0x3) == 0 && "invalid shift for relocation");
+    break;
+  }
+  }
+}
+
 void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
                                           uint64_t Offset,
                                           uint32_t Value,
                                           uint32_t Type,
                                           int32_t Addend) {
   // TODO: Add Thumb relocations.
+  uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress +
+                                                      Offset);
   uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset);
   uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF);
   Value += Addend;
@@ -293,44 +404,51 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section,
 
   // Write a 32bit value to relocation address, taking into account the
   // implicit addend encoded in the target.
-  case ELF::R_ARM_TARGET1 :
-  case ELF::R_ARM_ABS32 :
-    *TargetPtr += Value;
+  case ELF::R_ARM_TARGET1:
+  case ELF::R_ARM_ABS32:
+    *TargetPtr = *Placeholder + Value;
     break;
-
   // Write first 16 bit of 32 bit value to the mov instruction.
   // Last 4 bit should be shifted.
-  case ELF::R_ARM_MOVW_ABS_NC :
+  case ELF::R_ARM_MOVW_ABS_NC:
     // We are not expecting any other addend in the relocation address.
     // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2
     // non-contiguous fields.
-    assert((*TargetPtr & 0x000F0FFF) == 0);
+    assert((*Placeholder & 0x000F0FFF) == 0);
     Value = Value & 0xFFFF;
-    *TargetPtr |= Value & 0xFFF;
+    *TargetPtr = *Placeholder | (Value & 0xFFF);
     *TargetPtr |= ((Value >> 12) & 0xF) << 16;
     break;
-
   // Write last 16 bit of 32 bit value to the mov instruction.
   // Last 4 bit should be shifted.
-  case ELF::R_ARM_MOVT_ABS :
+  case ELF::R_ARM_MOVT_ABS:
     // We are not expecting any other addend in the relocation address.
     // Use 0x000F0FFF for the same reason as R_ARM_MOVW_ABS_NC.
-    assert((*TargetPtr & 0x000F0FFF) == 0);
+    assert((*Placeholder & 0x000F0FFF) == 0);
+
     Value = (Value >> 16) & 0xFFFF;
-    *TargetPtr |= Value & 0xFFF;
+    *TargetPtr = *Placeholder | (Value & 0xFFF);
     *TargetPtr |= ((Value >> 12) & 0xF) << 16;
     break;
-
   // Write 24 bit relative value to the branch instruction.
   case ELF::R_ARM_PC24 :    // Fall through.
   case ELF::R_ARM_CALL :    // Fall through.
-  case ELF::R_ARM_JUMP24 :
+  case ELF::R_ARM_JUMP24: {
     int32_t RelValue = static_cast<int32_t>(Value - FinalAddress - 8);
     RelValue = (RelValue & 0x03FFFFFC) >> 2;
+    assert((*TargetPtr & 0xFFFFFF) == 0xFFFFFE);
     *TargetPtr &= 0xFF000000;
     *TargetPtr |= RelValue;
     break;
   }
+  case ELF::R_ARM_PRIVATE_0:
+    // This relocation is reserved by the ARM ELF ABI for internal use. We
+    // appropriate it here to act as an R_ARM_ABS32 without any addend for use
+    // in the stubs created during JIT (which can't put an addend into the
+    // original object file).
+    *TargetPtr = Value;
+    break;
+  }
 }
 
 void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
@@ -338,6 +456,8 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
                                            uint32_t Value,
                                            uint32_t Type,
                                            int32_t Addend) {
+  uint32_t *Placeholder = reinterpret_cast<uint32_t*>(Section.ObjAddress +
+                                                      Offset);
   uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset);
   Value += Addend;
 
@@ -355,19 +475,30 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section,
     llvm_unreachable("Not implemented relocation type!");
     break;
   case ELF::R_MIPS_32:
-    *TargetPtr = Value + (*TargetPtr);
+    *TargetPtr = Value + (*Placeholder);
     break;
   case ELF::R_MIPS_26:
-    *TargetPtr = ((*TargetPtr) & 0xfc000000) | (( Value & 0x0fffffff) >> 2);
+    *TargetPtr = ((*Placeholder) & 0xfc000000) | (( Value & 0x0fffffff) >> 2);
     break;
   case ELF::R_MIPS_HI16:
     // Get the higher 16-bits. Also add 1 if bit 15 is 1.
-    Value += ((*TargetPtr) & 0x0000ffff) << 16;
+    Value += ((*Placeholder) & 0x0000ffff) << 16;
+    *TargetPtr = ((*Placeholder) & 0xffff0000) |
+                 (((Value + 0x8000) >> 16) & 0xffff);
+    break;
+  case ELF::R_MIPS_LO16:
+    Value += ((*Placeholder) & 0x0000ffff);
+    *TargetPtr = ((*Placeholder) & 0xffff0000) | (Value & 0xffff);
+    break;
+  case ELF::R_MIPS_UNUSED1:
+    // Similar to ELF::R_ARM_PRIVATE_0, R_MIPS_UNUSED1 and R_MIPS_UNUSED2
+    // are used for internal JIT purpose. These relocations are similar to
+    // R_MIPS_HI16 and R_MIPS_LO16, but they do not take any addend into
+    // account.
     *TargetPtr = ((*TargetPtr) & 0xffff0000) |
                  (((Value + 0x8000) >> 16) & 0xffff);
     break;
-   case ELF::R_MIPS_LO16:
-    Value += ((*TargetPtr) & 0x0000ffff);
+  case ELF::R_MIPS_UNUSED2:
     *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff);
     break;
    }
@@ -412,9 +543,13 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
   error_code err;
   for (section_iterator si = Obj.begin_sections(),
      se = Obj.end_sections(); si != se; si.increment(err)) {
-    StringRef SectionName;
-    check(si->getName(SectionName));
-    if (SectionName != ".opd")
+    section_iterator RelSecI = si->getRelocatedSection();
+    if (RelSecI == Obj.end_sections())
+      continue;
+
+    StringRef RelSectionName;
+    check(RelSecI->getName(RelSectionName));
+    if (RelSectionName != ".opd")
       continue;
 
     for (relocation_iterator i = si->begin_relocations(),
@@ -430,12 +565,11 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
         continue;
       }
 
-      SymbolRef TargetSymbol;
       uint64_t TargetSymbolOffset;
-      int64_t TargetAdditionalInfo;
-      check(i->getSymbol(TargetSymbol));
+      symbol_iterator TargetSymbol = i->getSymbol();
       check(i->getOffset(TargetSymbolOffset));
-      check(i->getAdditionalInfo(TargetAdditionalInfo));
+      int64_t Addend;
+      check(getELFRelocationAddend(*i, Addend));
 
       i = i.increment(err);
       if (i == e)
@@ -455,9 +589,9 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj,
         continue;
 
       section_iterator tsi(Obj.end_sections());
-      check(TargetSymbol.getSection(tsi));
+      check(TargetSymbol->getSection(tsi));
       Rel.SectionID = findOrEmitSection(Obj, (*tsi), true, LocalSections);
-      Rel.Addend = (intptr_t)TargetAdditionalInfo;
+      Rel.Addend = (intptr_t)Addend;
       return;
     }
   }
@@ -541,6 +675,11 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
       llvm_unreachable("Relocation R_PPC64_REL32 overflow");
     writeInt32BE(LocalAddress, delta);
   } break;
+  case ELF::R_PPC64_REL64: {
+    uint64_t FinalAddress = (Section.LoadAddress + Offset);
+    uint64_t Delta = Value - FinalAddress + Addend;
+    writeInt64BE(LocalAddress, Delta);
+  } break;
   case ELF::R_PPC64_ADDR64 :
     writeInt64BE(LocalAddress, Value + Addend);
     break;
@@ -560,6 +699,48 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section,
   }
 }
 
+void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section,
+                                              uint64_t Offset,
+                                              uint64_t Value,
+                                              uint32_t Type,
+                                              int64_t Addend) {
+  uint8_t *LocalAddress = Section.Address + Offset;
+  switch (Type) {
+  default:
+    llvm_unreachable("Relocation type not implemented yet!");
+    break;
+  case ELF::R_390_PC16DBL:
+  case ELF::R_390_PLT16DBL: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int16_t(Delta / 2) * 2 == Delta && "R_390_PC16DBL overflow");
+    writeInt16BE(LocalAddress, Delta / 2);
+    break;
+  }
+  case ELF::R_390_PC32DBL:
+  case ELF::R_390_PLT32DBL: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int32_t(Delta / 2) * 2 == Delta && "R_390_PC32DBL overflow");
+    writeInt32BE(LocalAddress, Delta / 2);
+    break;
+  }
+  case ELF::R_390_PC32: {
+    int64_t Delta = (Value + Addend) - (Section.LoadAddress + Offset);
+    assert(int32_t(Delta) == Delta && "R_390_PC32 overflow");
+    writeInt32BE(LocalAddress, Delta);
+    break;
+  }
+  case ELF::R_390_64:
+    writeInt64BE(LocalAddress, Value + Addend);
+    break;
+  }
+}
+
+void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE,
+				       uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend);
+}
+
 void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                                        uint64_t Offset,
                                        uint64_t Value,
@@ -574,6 +755,9 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                          (uint32_t)(Value & 0xffffffffL), Type,
                          (uint32_t)(Addend & 0xffffffffL));
     break;
+  case Triple::aarch64:
+    resolveAArch64Relocation(Section, Offset, Value, Type, Addend);
+    break;
   case Triple::arm:    // Fall through.
   case Triple::thumb:
     resolveARMRelocation(Section, Offset,
@@ -586,45 +770,56 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section,
                           (uint32_t)(Value & 0xffffffffL), Type,
                           (uint32_t)(Addend & 0xffffffffL));
     break;
-  case Triple::ppc64:
+  case Triple::ppc64:   // Fall through.
+  case Triple::ppc64le:
     resolvePPC64Relocation(Section, Offset, Value, Type, Addend);
     break;
+  case Triple::systemz:
+    resolveSystemZRelocation(Section, Offset, Value, Type, Addend);
+    break;
   default: llvm_unreachable("Unsupported CPU type!");
   }
 }
 
-void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldELF::processRelocationRef(unsigned SectionID,
+                                          RelocationRef RelI,
                                           ObjectImage &Obj,
                                           ObjSectionToIDMap &ObjSectionToID,
                                           const SymbolTableMap &Symbols,
                                           StubMap &Stubs) {
-
-  uint32_t RelType = (uint32_t)(Rel.Type & 0xffffffffL);
-  intptr_t Addend = (intptr_t)Rel.AdditionalInfo;
-  const SymbolRef &Symbol = Rel.Symbol;
+  uint64_t RelType;
+  Check(RelI.getType(RelType));
+  int64_t Addend;
+  Check(getELFRelocationAddend(RelI, Addend));
+  symbol_iterator Symbol = RelI.getSymbol();
 
   // Obtain the symbol name which is referenced in the relocation
   StringRef TargetName;
-  Symbol.getName(TargetName);
+  if (Symbol != Obj.end_symbols())
+    Symbol->getName(TargetName);
   DEBUG(dbgs() << "\t\tRelType: " << RelType
                << " Addend: " << Addend
                << " TargetName: " << TargetName
                << "\n");
   RelocationValueRef Value;
   // First search for the symbol in the local symbol table
-  SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
-  SymbolRef::Type SymType;
-  Symbol.getType(SymType);
+  SymbolTableMap::const_iterator lsi = Symbols.end();
+  SymbolRef::Type SymType = SymbolRef::ST_Unknown;
+  if (Symbol != Obj.end_symbols()) {
+    lsi = Symbols.find(TargetName.data());
+    Symbol->getType(SymType);
+  }
   if (lsi != Symbols.end()) {
     Value.SectionID = lsi->second.first;
-    Value.Addend = lsi->second.second;
+    Value.Addend = lsi->second.second + Addend;
   } else {
     // Search for the symbol in the global symbol table
-    SymbolTableMap::const_iterator gsi =
-        GlobalSymbolTable.find(TargetName.data());
+    SymbolTableMap::const_iterator gsi = GlobalSymbolTable.end();
+    if (Symbol != Obj.end_symbols())
+      gsi = GlobalSymbolTable.find(TargetName.data());
     if (gsi != GlobalSymbolTable.end()) {
       Value.SectionID = gsi->second.first;
-      Value.Addend = gsi->second.second;
+      Value.Addend = gsi->second.second + Addend;
     } else {
       switch (SymType) {
         case SymbolRef::ST_Debug: {
@@ -632,7 +827,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
           // and can be changed by another developers. Maybe best way is add
           // a new symbol type ST_Section to SymbolRef and use it.
           section_iterator si(Obj.end_sections());
-          Symbol.getSection(si);
+          Symbol->getSection(si);
           if (si == Obj.end_sections())
             llvm_unreachable("Symbol section not found, bad object file format!");
           DEBUG(dbgs() << "\t\tThis is section symbol\n");
@@ -657,21 +852,73 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       }
     }
   }
-  DEBUG(dbgs() << "\t\tRel.SectionID: " << Rel.SectionID
-               << " Rel.Offset: " << Rel.Offset
+  uint64_t Offset;
+  Check(RelI.getOffset(Offset));
+
+  DEBUG(dbgs() << "\t\tSectionID: " << SectionID
+               << " Offset: " << Offset
                << "\n");
-  if (Arch == Triple::arm &&
+  if (Arch == Triple::aarch64 &&
+      (RelType == ELF::R_AARCH64_CALL26 ||
+       RelType == ELF::R_AARCH64_JUMP26)) {
+    // This is an AArch64 branch relocation, need to use a stub function.
+    DEBUG(dbgs() << "\t\tThis is an AArch64 branch relocation.");
+    SectionEntry &Section = Sections[SectionID];
+
+    // Look for an existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    if (i != Stubs.end()) {
+        resolveRelocation(Section, Offset,
+                          (uint64_t)Section.Address + i->second, RelType, 0);
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *StubTargetAddr = createStubFunction(Section.Address +
+                                                   Section.StubOffset);
+
+      RelocationEntry REmovz_g3(SectionID,
+                                StubTargetAddr - Section.Address,
+                                ELF::R_AARCH64_MOVW_UABS_G3, Value.Addend);
+      RelocationEntry REmovk_g2(SectionID,
+                                StubTargetAddr - Section.Address + 4,
+                                ELF::R_AARCH64_MOVW_UABS_G2_NC, Value.Addend);
+      RelocationEntry REmovk_g1(SectionID,
+                                StubTargetAddr - Section.Address + 8,
+                                ELF::R_AARCH64_MOVW_UABS_G1_NC, Value.Addend);
+      RelocationEntry REmovk_g0(SectionID,
+                                StubTargetAddr - Section.Address + 12,
+                                ELF::R_AARCH64_MOVW_UABS_G0_NC, Value.Addend);
+
+      if (Value.SymbolName) {
+        addRelocationForSymbol(REmovz_g3, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g2, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g1, Value.SymbolName);
+        addRelocationForSymbol(REmovk_g0, Value.SymbolName);
+      } else {
+        addRelocationForSection(REmovz_g3, Value.SectionID);
+        addRelocationForSection(REmovk_g2, Value.SectionID);
+        addRelocationForSection(REmovk_g1, Value.SectionID);
+        addRelocationForSection(REmovk_g0, Value.SectionID);
+      }
+      resolveRelocation(Section, Offset,
+                        (uint64_t)Section.Address + Section.StubOffset,
+                        RelType, 0);
+      Section.StubOffset += getMaxStubSize();
+    }
+  } else if (Arch == Triple::arm &&
       (RelType == ELF::R_ARM_PC24 ||
        RelType == ELF::R_ARM_CALL ||
        RelType == ELF::R_ARM_JUMP24)) {
     // This is an ARM branch relocation, need to use a stub function.
     DEBUG(dbgs() << "\t\tThis is an ARM branch relocation.");
-    SectionEntry &Section = Sections[Rel.SectionID];
+    SectionEntry &Section = Sections[SectionID];
 
     // Look for an existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end()) {
-        resolveRelocation(Section, Rel.Offset,
+        resolveRelocation(Section, Offset,
                           (uint64_t)Section.Address + i->second, RelType, 0);
       DEBUG(dbgs() << " Stub function found\n");
     } else {
@@ -680,14 +927,14 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       Stubs[Value] = Section.StubOffset;
       uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                    Section.StubOffset);
-      RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
-                         ELF::R_ARM_ABS32, Value.Addend);
+      RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
+                         ELF::R_ARM_PRIVATE_0, Value.Addend);
       if (Value.SymbolName)
         addRelocationForSymbol(RE, Value.SymbolName);
       else
         addRelocationForSection(RE, Value.SectionID);
 
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
                         RelType, 0);
       Section.StubOffset += getMaxStubSize();
@@ -696,8 +943,8 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
              RelType == ELF::R_MIPS_26) {
     // This is an Mips branch relocation, need to use a stub function.
     DEBUG(dbgs() << "\t\tThis is a Mips branch relocation.");
-    SectionEntry &Section = Sections[Rel.SectionID];
-    uint8_t *Target = Section.Address + Rel.Offset;
+    SectionEntry &Section = Sections[SectionID];
+    uint8_t *Target = Section.Address + Offset;
     uint32_t *TargetAddress = (uint32_t *)Target;
 
     // Extract the addend from the instruction.
@@ -708,7 +955,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
     //  Look up for existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end()) {
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + i->second, RelType, 0);
       DEBUG(dbgs() << " Stub function found\n");
     } else {
@@ -719,12 +966,12 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
                                                    Section.StubOffset);
 
       // Creating Hi and Lo relocations for the filled stub instructions.
-      RelocationEntry REHi(Rel.SectionID,
+      RelocationEntry REHi(SectionID,
                            StubTargetAddr - Section.Address,
-                           ELF::R_MIPS_HI16, Value.Addend);
-      RelocationEntry RELo(Rel.SectionID,
+                           ELF::R_MIPS_UNUSED1, Value.Addend);
+      RelocationEntry RELo(SectionID,
                            StubTargetAddr - Section.Address + 4,
-                           ELF::R_MIPS_LO16, Value.Addend);
+                           ELF::R_MIPS_UNUSED2, Value.Addend);
 
       if (Value.SymbolName) {
         addRelocationForSymbol(REHi, Value.SymbolName);
@@ -734,18 +981,18 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         addRelocationForSection(RELo, Value.SectionID);
       }
 
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
                         RelType, 0);
       Section.StubOffset += getMaxStubSize();
     }
-  } else if (Arch == Triple::ppc64) {
+  } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) {
     if (RelType == ELF::R_PPC64_REL24) {
       // A PPC branch relocation will need a stub function if the target is
       // an external symbol (Symbol::ST_Unknown) or if the target address
       // is not within the signed 24-bits branch address.
-      SectionEntry &Section = Sections[Rel.SectionID];
-      uint8_t *Target = Section.Address + Rel.Offset;
+      SectionEntry &Section = Sections[SectionID];
+      uint8_t *Target = Section.Address + Offset;
       bool RangeOverflow = false;
       if (SymType != SymbolRef::ST_Unknown) {
         // A function call may points to the .opd entry, so the final symbol value
@@ -755,7 +1002,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         int32_t delta = static_cast<int32_t>(Target - RelocTarget);
         // If it is within 24-bits branch range, just set the branch target
         if (SignExtend32<24>(delta) == delta) {
-          RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+          RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
           if (Value.SymbolName)
             addRelocationForSymbol(RE, Value.SymbolName);
           else
@@ -770,7 +1017,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         StubMap::const_iterator i = Stubs.find(Value);
         if (i != Stubs.end()) {
           // Symbol function stub already created, just relocate to it
-          resolveRelocation(Section, Rel.Offset,
+          resolveRelocation(Section, Offset,
                             (uint64_t)Section.Address + i->second, RelType, 0);
           DEBUG(dbgs() << " Stub function found\n");
         } else {
@@ -779,21 +1026,21 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
           Stubs[Value] = Section.StubOffset;
           uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                        Section.StubOffset);
-          RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+          RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                              ELF::R_PPC64_ADDR64, Value.Addend);
 
           // Generates the 64-bits address loads as exemplified in section
           // 4.5.1 in PPC64 ELF ABI.
-          RelocationEntry REhst(Rel.SectionID,
+          RelocationEntry REhst(SectionID,
                                 StubTargetAddr - Section.Address + 2,
                                 ELF::R_PPC64_ADDR16_HIGHEST, Value.Addend);
-          RelocationEntry REhr(Rel.SectionID,
+          RelocationEntry REhr(SectionID,
                                StubTargetAddr - Section.Address + 6,
                                ELF::R_PPC64_ADDR16_HIGHER, Value.Addend);
-          RelocationEntry REh(Rel.SectionID,
+          RelocationEntry REh(SectionID,
                               StubTargetAddr - Section.Address + 14,
                               ELF::R_PPC64_ADDR16_HI, Value.Addend);
-          RelocationEntry REl(Rel.SectionID,
+          RelocationEntry REl(SectionID,
                               StubTargetAddr - Section.Address + 18,
                               ELF::R_PPC64_ADDR16_LO, Value.Addend);
 
@@ -809,7 +1056,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
             addRelocationForSection(REl,   Value.SectionID);
           }
 
-          resolveRelocation(Section, Rel.Offset,
+          resolveRelocation(Section, Offset,
                             (uint64_t)Section.Address + Section.StubOffset,
                             RelType, 0);
           if (SymType == SymbolRef::ST_Unknown)
@@ -819,7 +1066,7 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
         }
       }
     } else {
-      RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+      RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
       // Extra check to avoid relocation againt empty symbols (usually
       // the R_PPC64_TOC).
       if (Value.SymbolName && !TargetName.empty())
@@ -827,8 +1074,55 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
       else
         addRelocationForSection(RE, Value.SectionID);
     }
+  } else if (Arch == Triple::systemz &&
+             (RelType == ELF::R_390_PLT32DBL ||
+              RelType == ELF::R_390_GOTENT)) {
+    // Create function stubs for both PLT and GOT references, regardless of
+    // whether the GOT reference is to data or code.  The stub contains the
+    // full address of the symbol, as needed by GOT references, and the
+    // executable part only adds an overhead of 8 bytes.
+    //
+    // We could try to conserve space by allocating the code and data
+    // parts of the stub separately.  However, as things stand, we allocate
+    // a stub for every relocation, so using a GOT in JIT code should be
+    // no less space efficient than using an explicit constant pool.
+    DEBUG(dbgs() << "\t\tThis is a SystemZ indirect relocation.");
+    SectionEntry &Section = Sections[SectionID];
+
+    // Look for an existing stub.
+    StubMap::const_iterator i = Stubs.find(Value);
+    uintptr_t StubAddress;
+    if (i != Stubs.end()) {
+      StubAddress = uintptr_t(Section.Address) + i->second;
+      DEBUG(dbgs() << " Stub function found\n");
+    } else {
+      // Create a new stub function.
+      DEBUG(dbgs() << " Create a new stub function\n");
+
+      uintptr_t BaseAddress = uintptr_t(Section.Address);
+      uintptr_t StubAlignment = getStubAlignment();
+      StubAddress = (BaseAddress + Section.StubOffset +
+                     StubAlignment - 1) & -StubAlignment;
+      unsigned StubOffset = StubAddress - BaseAddress;
+
+      Stubs[Value] = StubOffset;
+      createStubFunction((uint8_t *)StubAddress);
+      RelocationEntry RE(SectionID, StubOffset + 8,
+                         ELF::R_390_64, Value.Addend - Addend);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+      Section.StubOffset = StubOffset + getMaxStubSize();
+    }
+
+    if (RelType == ELF::R_390_GOTENT)
+      resolveRelocation(Section, Offset, StubAddress + 8,
+                        ELF::R_390_PC32DBL, Addend);
+    else
+      resolveRelocation(Section, Offset, StubAddress, RelType, Addend);
   } else {
-    RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
     else
@@ -836,13 +1130,6 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel,
   }
 }
 
-unsigned RuntimeDyldELF::getCommonSymbolAlignment(const SymbolRef &Sym) {
-  // In ELF, the value of an SHN_COMMON symbol is its alignment requirement.
-  uint64_t Align;
-  Check(Sym.getValue(Align));
-  return Align;
-}
-
 bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const {
   if (Buffer->getBufferSize() < strlen(ELF::ElfMagic))
     return false;
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
index 07e704b..794c7ec 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h
@@ -31,7 +31,12 @@ namespace {
 } // end anonymous namespace
 
 class RuntimeDyldELF : public RuntimeDyldImpl {
-protected:
+  void resolveRelocation(const SectionEntry &Section,
+                         uint64_t Offset,
+                         uint64_t Value,
+                         uint32_t Type,
+                         int64_t Addend);
+
   void resolveX86_64Relocation(const SectionEntry &Section,
                                uint64_t Offset,
                                uint64_t Value,
@@ -44,6 +49,12 @@ protected:
                             uint32_t Type,
                             int32_t Addend);
 
+  void resolveAArch64Relocation(const SectionEntry &Section,
+                                uint64_t Offset,
+                                uint64_t Value,
+                                uint32_t Type,
+                                int64_t Addend);
+
   void resolveARMRelocation(const SectionEntry &Section,
                             uint64_t Offset,
                             uint32_t Value,
@@ -62,21 +73,11 @@ protected:
                               uint32_t Type,
                               int64_t Addend);
 
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend);
-
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
-                                    ObjectImage &Obj,
-                                    ObjSectionToIDMap &ObjSectionToID,
-                                    const SymbolTableMap &Symbols,
-                                    StubMap &Stubs);
-
-  unsigned getCommonSymbolAlignment(const SymbolRef &Sym);
-
-  virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+  void resolveSystemZRelocation(const SectionEntry &Section,
+                                uint64_t Offset,
+                                uint64_t Value,
+                                uint32_t Type,
+                                int64_t Addend);
 
   uint64_t findPPC64TOC() const;
   void findOPDEntrySection(ObjectImage &Obj,
@@ -84,12 +85,19 @@ protected:
                            RelocationValueRef &Rel);
 
 public:
-  RuntimeDyldELF(RTDyldMemoryManager *mm)
-      : RuntimeDyldImpl(mm) {}
+  RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
 
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
+                                    ObjectImage &Obj,
+                                    ObjSectionToIDMap &ObjSectionToID,
+                                    const SymbolTableMap &Symbols,
+                                    StubMap &Stubs);
+  virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer);
+  virtual StringRef getEHFrameSection();
   virtual ~RuntimeDyldELF();
-
-  bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
 };
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
index f100994..14d945b 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h
@@ -49,7 +49,7 @@ public:
   /// Address - address in the linker's memory where the section resides.
   uint8_t *Address;
 
-  /// Size - section size.
+  /// Size - section size. Doesn't include the stubs.
   size_t Size;
 
   /// LoadAddress - the address of the section in the target process's memory.
@@ -67,9 +67,9 @@ public:
   uintptr_t ObjAddress;
 
   SectionEntry(StringRef name, uint8_t *address, size_t size,
-	       uintptr_t stubOffset, uintptr_t objAddress)
+               uintptr_t objAddress)
     : Name(name), Address(address), Size(size), LoadAddress((uintptr_t)address),
-      StubOffset(stubOffset), ObjAddress(objAddress) {}
+      StubOffset(size), ObjAddress(objAddress) {}
 };
 
 /// RelocationEntry - used to represent relocations internally in the dynamic
@@ -89,20 +89,20 @@ public:
   /// used to make a relocation section relative instead of symbol relative.
   intptr_t Addend;
 
+  /// True if this is a PCRel relocation (MachO specific).
+  bool IsPCRel;
+
+  /// The size of this relocation (MachO specific).
+  unsigned Size;
+
   RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend)
-    : SectionID(id), Offset(offset), RelType(type), Addend(addend) {}
-};
+    : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+      IsPCRel(false), Size(0) {}
 
-/// ObjRelocationInfo - relocation information as read from the object file.
-/// Used to pass around data taken from object::RelocationRef, together with
-/// the section to which the relocation points (represented by a SectionID).
-class ObjRelocationInfo {
-public:
-  unsigned  SectionID;
-  uint64_t  Offset;
-  SymbolRef Symbol;
-  uint64_t  Type;
-  int64_t   AdditionalInfo;
+  RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend,
+                  bool IsPCRel, unsigned Size)
+    : SectionID(id), Offset(offset), RelType(type), Addend(addend),
+      IsPCRel(IsPCRel), Size(Size) {}
 };
 
 class RelocationValueRef {
@@ -166,16 +166,29 @@ protected:
   Triple::ArchType Arch;
 
   inline unsigned getMaxStubSize() {
+    if (Arch == Triple::aarch64)
+      return 20; // movz; movk; movk; movk; br
     if (Arch == Triple::arm || Arch == Triple::thumb)
       return 8; // 32-bit instruction and 32-bit address
     else if (Arch == Triple::mipsel || Arch == Triple::mips)
       return 16;
-    else if (Arch == Triple::ppc64)
+    else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le)
       return 44;
+    else if (Arch == Triple::x86_64)
+      return 8; // GOT
+    else if (Arch == Triple::systemz)
+      return 16;
     else
       return 0;
   }
 
+  inline unsigned getStubAlignment() {
+    if (Arch == Triple::systemz)
+      return 8;
+    else
+      return 1;
+  }
+
   bool HasError;
   std::string ErrorStr;
 
@@ -194,22 +207,15 @@ protected:
     return (uint8_t*)Sections[SectionID].Address;
   }
 
-  // Subclasses can override this method to get the alignment requirement of
-  // a common symbol. Returns no alignment requirement if not implemented.
-  virtual unsigned getCommonSymbolAlignment(const SymbolRef &Sym) {
-    return 0;
-  }
-
-
   void writeInt16BE(uint8_t *Addr, uint16_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 8) & 0xFF;
     *(Addr+1) = Value & 0xFF;
   }
 
   void writeInt32BE(uint8_t *Addr, uint32_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 24) & 0xFF;
     *(Addr+1) = (Value >> 16) & 0xFF;
@@ -218,7 +224,7 @@ protected:
   }
 
   void writeInt64BE(uint8_t *Addr, uint64_t Value) {
-    if (sys::isLittleEndianHost())
+    if (sys::IsLittleEndianHost)
       Value = sys::SwapByteOrder(Value);
     *Addr     = (Value >> 56) & 0xFF;
     *(Addr+1) = (Value >> 48) & 0xFF;
@@ -269,24 +275,16 @@ protected:
 
   /// \brief Resolves relocations from Relocs list with address from Value.
   void resolveRelocationList(const RelocationList &Relocs, uint64_t Value);
-  void resolveRelocationEntry(const RelocationEntry &RE, uint64_t Value);
 
   /// \brief A object file specific relocation resolver
-  /// \param Section The section where the relocation is being applied
-  /// \param Offset The offset into the section for this relocation
+  /// \param RE The relocation to be resolved
   /// \param Value Target symbol address to apply the relocation action
-  /// \param Type object file specific relocation type
-  /// \param Addend A constant addend used to compute the value to be stored
-  ///        into the relocatable field
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend) = 0;
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value) = 0;
 
   /// \brief Parses the object file relocation and stores it to Relocations
   ///        or SymbolRelocations (this depends on the object file type).
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
                                     ObjectImage &Obj,
                                     ObjSectionToIDMap &ObjSectionToID,
                                     const SymbolTableMap &Symbols,
@@ -336,6 +334,8 @@ public:
   StringRef getErrorString() { return ErrorStr; }
 
   virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0;
+
+  virtual StringRef getEHFrameSection();
 };
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
index bcc3df1..0384b32 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp
@@ -21,16 +21,87 @@ using namespace llvm::object;
 
 namespace llvm {
 
+static unsigned char *processFDE(unsigned char *P, intptr_t DeltaForText, intptr_t DeltaForEH) {
+  uint32_t Length = *((uint32_t*)P);
+  P += 4;
+  unsigned char *Ret = P + Length;
+  uint32_t Offset = *((uint32_t*)P);
+  if (Offset == 0) // is a CIE
+    return Ret;
+
+  P += 4;
+  intptr_t FDELocation = *((intptr_t*)P);
+  intptr_t NewLocation = FDELocation - DeltaForText;
+  *((intptr_t*)P) = NewLocation;
+  P += sizeof(intptr_t);
+
+  // Skip the FDE address range
+  P += sizeof(intptr_t);
+
+  uint8_t Augmentationsize = *P;
+  P += 1;
+  if (Augmentationsize != 0) {
+    intptr_t LSDA = *((intptr_t*)P);
+    intptr_t NewLSDA = LSDA - DeltaForEH;
+    *((intptr_t*)P) = NewLSDA;
+  }
+
+  return Ret;
+}
+
+static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) {
+  intptr_t ObjDistance = A->ObjAddress  - B->ObjAddress;
+  intptr_t MemDistance = A->LoadAddress - B->LoadAddress;
+  return ObjDistance - MemDistance;
+}
+
+StringRef RuntimeDyldMachO::getEHFrameSection() {
+  SectionEntry *Text = NULL;
+  SectionEntry *EHFrame = NULL;
+  SectionEntry *ExceptTab = NULL;
+  for (int i = 0, e = Sections.size(); i != e; ++i) {
+    if (Sections[i].Name == "__eh_frame")
+      EHFrame = &Sections[i];
+    else if (Sections[i].Name == "__text")
+      Text = &Sections[i];
+    else if (Sections[i].Name == "__gcc_except_tab")
+      ExceptTab = &Sections[i];
+  }
+  if (Text == NULL || EHFrame == NULL)
+    return StringRef();
+
+  intptr_t DeltaForText = computeDelta(Text, EHFrame);
+  intptr_t DeltaForEH = 0;
+  if (ExceptTab)
+    DeltaForEH = computeDelta(ExceptTab, EHFrame);
+
+  unsigned char *P = EHFrame->Address;
+  unsigned char *End = P + EHFrame->Size;
+  do  {
+    P = processFDE(P, DeltaForText, DeltaForEH);
+  } while(P != End);
+
+  return StringRef((char*)EHFrame->Address, EHFrame->Size);
+}
+
+void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE,
+                                         uint64_t Value) {
+  const SectionEntry &Section = Sections[RE.SectionID];
+  return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend,
+                           RE.IsPCRel, RE.Size);
+}
+
 void RuntimeDyldMachO::resolveRelocation(const SectionEntry &Section,
                                          uint64_t Offset,
                                          uint64_t Value,
                                          uint32_t Type,
-                                         int64_t Addend) {
+                                         int64_t Addend,
+                                         bool isPCRel,
+                                         unsigned LogSize) {
   uint8_t *LocalAddress = Section.Address + Offset;
   uint64_t FinalAddress = Section.LoadAddress + Offset;
-  bool isPCRel = (Type >> 24) & 1;
-  unsigned MachoType = (Type >> 28) & 0xf;
-  unsigned Size = 1 << ((Type >> 25) & 3);
+  unsigned MachoType = Type;
+  unsigned Size = 1 << LogSize;
 
   DEBUG(dbgs() << "resolveRelocation LocalAddress: " 
         << format("%p", LocalAddress)
@@ -205,89 +276,110 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress,
   return false;
 }
 
-void RuntimeDyldMachO::processRelocationRef(const ObjRelocationInfo &Rel,
+void RuntimeDyldMachO::processRelocationRef(unsigned SectionID,
+                                            RelocationRef RelI,
                                             ObjectImage &Obj,
                                             ObjSectionToIDMap &ObjSectionToID,
                                             const SymbolTableMap &Symbols,
                                             StubMap &Stubs) {
+  const ObjectFile *OF = Obj.getObjectFile();
+  const MachOObjectFile *MachO = static_cast<const MachOObjectFile*>(OF);
+  macho::RelocationEntry RE = MachO->getRelocation(RelI.getRawDataRefImpl());
 
-  uint32_t RelType = (uint32_t) (Rel.Type & 0xffffffffL);
+  uint32_t RelType = MachO->getAnyRelocationType(RE);
   RelocationValueRef Value;
-  SectionEntry &Section = Sections[Rel.SectionID];
+  SectionEntry &Section = Sections[SectionID];
+
+  bool isExtern = MachO->getPlainRelocationExternal(RE);
+  bool IsPCRel = MachO->getAnyRelocationPCRel(RE);
+  unsigned Size = MachO->getAnyRelocationLength(RE);
+  uint64_t Offset;
+  RelI.getOffset(Offset);
+  uint8_t *LocalAddress = Section.Address + Offset;
+  unsigned NumBytes = 1 << Size;
+  uint64_t Addend = 0;
+  memcpy(&Addend, LocalAddress, NumBytes);
 
-  bool isExtern = (RelType >> 27) & 1;
   if (isExtern) {
     // Obtain the symbol name which is referenced in the relocation
+    symbol_iterator Symbol = RelI.getSymbol();
     StringRef TargetName;
-    const SymbolRef &Symbol = Rel.Symbol;
-    Symbol.getName(TargetName);
+    Symbol->getName(TargetName);
     // First search for the symbol in the local symbol table
     SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data());
     if (lsi != Symbols.end()) {
       Value.SectionID = lsi->second.first;
-      Value.Addend = lsi->second.second;
+      Value.Addend = lsi->second.second + Addend;
     } else {
       // Search for the symbol in the global symbol table
       SymbolTableMap::const_iterator gsi = GlobalSymbolTable.find(TargetName.data());
       if (gsi != GlobalSymbolTable.end()) {
         Value.SectionID = gsi->second.first;
-        Value.Addend = gsi->second.second;
-      } else
+        Value.Addend = gsi->second.second + Addend;
+      } else {
         Value.SymbolName = TargetName.data();
+        Value.Addend = Addend;
+      }
     }
   } else {
-    error_code err;
-    uint8_t sectionIndex = static_cast<uint8_t>(RelType & 0xFF);
-    section_iterator si = Obj.begin_sections(),
-                     se = Obj.end_sections();
-    for (uint8_t i = 1; i < sectionIndex; i++) {
-      error_code err;
-      si.increment(err);
-      if (si == se)
-        break;
-    }
-    assert(si != se && "No section containing relocation!");
-    Value.SectionID = findOrEmitSection(Obj, *si, true, ObjSectionToID);
-    Value.Addend = 0;
-    // FIXME: The size and type of the relocation determines if we can
-    // encode an Addend in the target location itself, and if so, how many
-    // bytes we should read in order to get it. We don't yet support doing
-    // that, and just assuming it's sizeof(intptr_t) is blatantly wrong.
-    //Value.Addend = *(const intptr_t *)Target;
-    if (Value.Addend) {
-      // The MachO addend is an offset from the current section.  We need it
-      // to be an offset from the destination section
-      Value.Addend += Section.ObjAddress - Sections[Value.SectionID].ObjAddress;
-    }
+    SectionRef Sec = MachO->getRelocationSection(RE);
+    Value.SectionID = findOrEmitSection(Obj, Sec, true, ObjSectionToID);
+    uint64_t Addr;
+    Sec.getAddress(Addr);
+    Value.Addend = Addend - Addr;
   }
 
-  if (Arch == Triple::arm && (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
+  if (Arch == Triple::x86_64 && RelType == macho::RIT_X86_64_GOT) {
+    assert(IsPCRel);
+    assert(Size == 2);
+    StubMap::const_iterator i = Stubs.find(Value);
+    uint8_t *Addr;
+    if (i != Stubs.end()) {
+      Addr = Section.Address + i->second;
+    } else {
+      Stubs[Value] = Section.StubOffset;
+      uint8_t *GOTEntry = Section.Address + Section.StubOffset;
+      RelocationEntry RE(SectionID, Section.StubOffset,
+                         macho::RIT_X86_64_Unsigned, Value.Addend - 4, false,
+                         3);
+      if (Value.SymbolName)
+        addRelocationForSymbol(RE, Value.SymbolName);
+      else
+        addRelocationForSection(RE, Value.SectionID);
+      Section.StubOffset += 8;
+      Addr = GOTEntry;
+    }
+    resolveRelocation(Section, Offset, (uint64_t)Addr,
+                      macho::RIT_X86_64_Unsigned, 4, true, 2);
+  } else if (Arch == Triple::arm &&
+             (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) {
     // This is an ARM branch relocation, need to use a stub function.
 
     //  Look up for existing stub.
     StubMap::const_iterator i = Stubs.find(Value);
     if (i != Stubs.end())
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + i->second,
-                        RelType, 0);
+                        RelType, 0, IsPCRel, Size);
     else {
       // Create a new stub function.
       Stubs[Value] = Section.StubOffset;
       uint8_t *StubTargetAddr = createStubFunction(Section.Address +
                                                    Section.StubOffset);
-      RelocationEntry RE(Rel.SectionID, StubTargetAddr - Section.Address,
+      RelocationEntry RE(SectionID, StubTargetAddr - Section.Address,
                          macho::RIT_Vanilla, Value.Addend);
       if (Value.SymbolName)
         addRelocationForSymbol(RE, Value.SymbolName);
       else
         addRelocationForSection(RE, Value.SectionID);
-      resolveRelocation(Section, Rel.Offset,
+      resolveRelocation(Section, Offset,
                         (uint64_t)Section.Address + Section.StubOffset,
-                        RelType, 0);
+                        RelType, 0, IsPCRel, Size);
       Section.StubOffset += getMaxStubSize();
     }
   } else {
-    RelocationEntry RE(Rel.SectionID, Rel.Offset, RelType, Value.Addend);
+    RelocationEntry RE(SectionID, Offset, RelType, Value.Addend,
+                       IsPCRel, Size);
     if (Value.SymbolName)
       addRelocationForSymbol(RE, Value.SymbolName);
     else
diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
index 62d8487..df8d3bb 100644
--- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
+++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h
@@ -16,7 +16,7 @@
 
 #include "RuntimeDyldImpl.h"
 #include "llvm/ADT/IndexedMap.h"
-#include "llvm/Object/MachOObject.h"
+#include "llvm/Object/MachO.h"
 #include "llvm/Support/Format.h"
 
 using namespace llvm;
@@ -25,7 +25,6 @@ using namespace llvm::object;
 
 namespace llvm {
 class RuntimeDyldMachO : public RuntimeDyldImpl {
-protected:
   bool resolveI386Relocation(uint8_t *LocalAddress,
                              uint64_t FinalAddress,
                              uint64_t Value,
@@ -48,22 +47,25 @@ protected:
                             unsigned Size,
                             int64_t Addend);
 
-  virtual void processRelocationRef(const ObjRelocationInfo &Rel,
+  void resolveRelocation(const SectionEntry &Section,
+                         uint64_t Offset,
+                         uint64_t Value,
+                         uint32_t Type,
+                         int64_t Addend,
+                         bool isPCRel,
+                         unsigned Size);
+public:
+  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
+
+  virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value);
+  virtual void processRelocationRef(unsigned SectionID,
+                                    RelocationRef RelI,
                                     ObjectImage &Obj,
                                     ObjSectionToIDMap &ObjSectionToID,
                                     const SymbolTableMap &Symbols,
                                     StubMap &Stubs);
-
-public:
-  virtual void resolveRelocation(const SectionEntry &Section,
-                                 uint64_t Offset,
-                                 uint64_t Value,
-                                 uint32_t Type,
-                                 int64_t Addend);
-
-  RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {}
-
-  bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const;
+  virtual StringRef getEHFrameSection();
 };
 
 } // end namespace llvm
diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp
index ca4330f..558d8b3 100644
--- a/lib/ExecutionEngine/TargetSelect.cpp
+++ b/lib/ExecutionEngine/TargetSelect.cpp
@@ -88,6 +88,14 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple,
     FeaturesStr = Features.getString();
   }
 
+  // FIXME: non-iOS ARM FastISel is broken with MCJIT.
+  if (UseMCJIT &&
+      TheTriple.getArch() == Triple::arm &&
+      TheTriple.getOS() != Triple::IOS &&
+      OptLevel == CodeGenOpt::None) {
+    OptLevel = CodeGenOpt::Less;
+  }
+
   // Allocate a target...
   TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(),
                                                          MCPU, FeaturesStr,