diff options
author | Chris Lattner <sabre@nondot.org> | 2004-04-05 01:27:26 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2004-04-05 01:27:26 +0000 |
commit | 5fa428fda9eb0f333311eca20b9f08fef975a8c0 (patch) | |
tree | ccab09d06a0ebfcecfaa658b31360d9fb63903b0 | |
parent | 68056127bb76a28713f829d92b309c2b1960ffc2 (diff) | |
download | external_llvm-5fa428fda9eb0f333311eca20b9f08fef975a8c0.zip external_llvm-5fa428fda9eb0f333311eca20b9f08fef975a8c0.tar.gz external_llvm-5fa428fda9eb0f333311eca20b9f08fef975a8c0.tar.bz2 |
Implement support for a new LLVM 1.3 bytecode format, which uses uint's
to index into structure types and allows arbitrary 32- and 64-bit integer
types to index into sequential types.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@12651 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | lib/Bytecode/Reader/ConstantReader.cpp | 15 | ||||
-rw-r--r-- | lib/Bytecode/Reader/InstructionReader.cpp | 33 | ||||
-rw-r--r-- | lib/Bytecode/Reader/Reader.cpp | 11 | ||||
-rw-r--r-- | lib/Bytecode/Reader/ReaderInternals.h | 7 | ||||
-rw-r--r-- | lib/Bytecode/Writer/InstructionWriter.cpp | 206 | ||||
-rw-r--r-- | lib/Bytecode/Writer/Writer.cpp | 6 |
6 files changed, 191 insertions, 87 deletions
diff --git a/lib/Bytecode/Reader/ConstantReader.cpp b/lib/Bytecode/Reader/ConstantReader.cpp index b4a219d..8691b26 100644 --- a/lib/Bytecode/Reader/ConstantReader.cpp +++ b/lib/Bytecode/Reader/ConstantReader.cpp @@ -15,6 +15,7 @@ #include "ReaderInternals.h" #include "llvm/Module.h" #include "llvm/Constants.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include <algorithm> using namespace llvm; @@ -164,6 +165,20 @@ Constant *BytecodeParser::parseConstantValue(const unsigned char *&Buf, return ConstantExpr::getCast(ArgVec[0], getType(TypeID)); } else if (Opcode == Instruction::GetElementPtr) { // GetElementPtr std::vector<Constant*> IdxList(ArgVec.begin()+1, ArgVec.end()); + + if (hasRestrictedGEPTypes) { + const Type *BaseTy = ArgVec[0]->getType(); + generic_gep_type_iterator<std::vector<Constant*>::iterator> + GTI = gep_type_begin(BaseTy, IdxList.begin(), IdxList.end()), + E = gep_type_end(BaseTy, IdxList.begin(), IdxList.end()); + for (unsigned i = 0; GTI != E; ++GTI, ++i) + if (isa<StructType>(*GTI)) { + if (IdxList[i]->getType() != Type::UByteTy) + throw std::string("Invalid index for getelementptr!"); + IdxList[i] = ConstantExpr::getCast(IdxList[i], Type::UIntTy); + } + } + return ConstantExpr::getGetElementPtr(ArgVec[0], IdxList); } else if (Opcode == Instruction::Select) { assert(ArgVec.size() == 3); diff --git a/lib/Bytecode/Reader/InstructionReader.cpp b/lib/Bytecode/Reader/InstructionReader.cpp index 90be8cd..d66b12c 100644 --- a/lib/Bytecode/Reader/InstructionReader.cpp +++ b/lib/Bytecode/Reader/InstructionReader.cpp @@ -308,10 +308,35 @@ void BytecodeParser::ParseInstruction(const unsigned char *&Buf, for (unsigned i = 1, e = Args.size(); i != e; ++i) { const CompositeType *TopTy = dyn_cast_or_null<CompositeType>(NextTy); if (!TopTy) throw std::string("Invalid getelementptr instruction!"); - // FIXME: when PR82 is resolved. - unsigned IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID :Type::LongTyID; - - Idx.push_back(getValue(IdxTy, Args[i])); + + unsigned ValIdx = Args[i]; + unsigned IdxTy; + if (!hasRestrictedGEPTypes) { + // Struct indices are always uints, sequential type indices can be any + // of the 32 or 64-bit integer types. The actual choice of type is + // encoded in the low two bits of the slot number. + if (isa<StructType>(TopTy)) + IdxTy = Type::UIntTyID; + else { + switch (ValIdx & 3) { + case 0: IdxTy = Type::UIntTyID; break; + case 1: IdxTy = Type::IntTyID; break; + case 2: IdxTy = Type::ULongTyID; break; + case 3: IdxTy = Type::LongTyID; break; + } + ValIdx >>= 2; + } + } else { + IdxTy = isa<StructType>(TopTy) ? Type::UByteTyID : Type::LongTyID; + } + + Idx.push_back(getValue(IdxTy, ValIdx)); + + // Convert ubyte struct indices into uint struct indices. + if (isa<StructType>(TopTy) && hasRestrictedGEPTypes) + if (ConstantUInt *C = dyn_cast<ConstantUInt>(Idx.back())) + Idx[Idx.size()-1] = ConstantExpr::getCast(C, Type::UIntTy); + NextTy = GetElementPtrInst::getIndexedType(InstTy, Idx, true); } diff --git a/lib/Bytecode/Reader/Reader.cpp b/lib/Bytecode/Reader/Reader.cpp index 54c9181..2f0879b 100644 --- a/lib/Bytecode/Reader/Reader.cpp +++ b/lib/Bytecode/Reader/Reader.cpp @@ -647,12 +647,10 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf, // Default values for the current bytecode version hasInconsistentModuleGlobalInfo = false; hasExplicitPrimitiveZeros = false; + hasRestrictedGEPTypes = false; switch (RevisionNum) { case 0: // LLVM 1.0, 1.1 release version - // Compared to rev #2, we added support for weak linkage, a more dense - // encoding, and better varargs support. - // Base LLVM 1.0 bytecode format. hasInconsistentModuleGlobalInfo = true; hasExplicitPrimitiveZeros = true; @@ -663,6 +661,13 @@ void BytecodeParser::ParseVersionInfo(const unsigned char *&Buf, // Also, it fixed the problem where the size of the ModuleGlobalInfo block // included the size for the alignment at the end, where the rest of the // blocks did not. + + // LLVM 1.2 and before required that GEP indices be ubyte constants for + // structures and longs for sequential types. + hasRestrictedGEPTypes = true; + + // FALL THROUGH + case 2: // LLVM 1.3 release version break; default: diff --git a/lib/Bytecode/Reader/ReaderInternals.h b/lib/Bytecode/Reader/ReaderInternals.h index 86bf800..9e0ffc2 100644 --- a/lib/Bytecode/Reader/ReaderInternals.h +++ b/lib/Bytecode/Reader/ReaderInternals.h @@ -108,6 +108,13 @@ private: // int/sbyte/etc. bool hasExplicitPrimitiveZeros; + // Flags to control features specific the LLVM 1.2 and before (revision #1) + + // LLVM 1.2 and earlier required that getelementptr structure indices were + // ubyte constants and that sequential type indices were longs. + bool hasRestrictedGEPTypes; + + typedef std::vector<ValueList*> ValueTable; ValueTable Values; ValueTable ModuleValues; diff --git a/lib/Bytecode/Writer/InstructionWriter.cpp b/lib/Bytecode/Writer/InstructionWriter.cpp index e86b027..9e06351 100644 --- a/lib/Bytecode/Writer/InstructionWriter.cpp +++ b/lib/Bytecode/Writer/InstructionWriter.cpp @@ -16,6 +16,7 @@ #include "llvm/Module.h" #include "llvm/DerivedTypes.h" #include "llvm/Instructions.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "Support/Statistic.h" #include <algorithm> using namespace llvm; @@ -38,20 +39,48 @@ static void outputInstructionFormat0(const Instruction *I, unsigned Opcode, output_vbr(NumArgs + (isa<CastInst>(I) || isa<VANextInst>(I) || isa<VAArgInst>(I)), Out); - for (unsigned i = 0; i < NumArgs; ++i) { - int Slot = Table.getSlot(I->getOperand(i)); - assert(Slot >= 0 && "No slot number for value!?!?"); - output_vbr((unsigned)Slot, Out); - } + if (!isa<GetElementPtrInst>(&I)) { + for (unsigned i = 0; i < NumArgs; ++i) { + int Slot = Table.getSlot(I->getOperand(i)); + assert(Slot >= 0 && "No slot number for value!?!?"); + output_vbr((unsigned)Slot, Out); + } - if (isa<CastInst>(I) || isa<VAArgInst>(I)) { - int Slot = Table.getSlot(I->getType()); - assert(Slot != -1 && "Cast return type unknown?"); - output_vbr((unsigned)Slot, Out); - } else if (const VANextInst *VAI = dyn_cast<VANextInst>(I)) { - int Slot = Table.getSlot(VAI->getArgType()); - assert(Slot != -1 && "VarArg argument type unknown?"); - output_vbr((unsigned)Slot, Out); + if (isa<CastInst>(I) || isa<VAArgInst>(I)) { + int Slot = Table.getSlot(I->getType()); + assert(Slot != -1 && "Cast return type unknown?"); + output_vbr((unsigned)Slot, Out); + } else if (const VANextInst *VAI = dyn_cast<VANextInst>(I)) { + int Slot = Table.getSlot(VAI->getArgType()); + assert(Slot != -1 && "VarArg argument type unknown?"); + output_vbr((unsigned)Slot, Out); + } + + } else { + int Slot = Table.getSlot(I->getOperand(0)); + assert(Slot >= 0 && "No slot number for value!?!?"); + output_vbr(unsigned(Slot), Out); + + // We need to encode the type of sequential type indices into their slot # + unsigned Idx = 1; + for (gep_type_iterator TI = gep_type_begin(I), E = gep_type_end(I); + Idx != NumArgs; ++TI, ++Idx) { + Slot = Table.getSlot(I->getOperand(Idx)); + assert(Slot >= 0 && "No slot number for value!?!?"); + + if (isa<SequentialType>(*TI)) { + unsigned IdxId; + switch (I->getOperand(Idx)->getType()->getPrimitiveID()) { + default: assert(0 && "Unknown index type!"); + case Type::UIntTyID: IdxId = 0; break; + case Type::IntTyID: IdxId = 1; break; + case Type::ULongTyID: IdxId = 2; break; + case Type::LongTyID: IdxId = 3; break; + } + Slot = (Slot << 2) | IdxId; + } + output_vbr(unsigned(Slot), Out); + } } align32(Out); // We must maintain correct alignment! @@ -119,8 +148,9 @@ static void outputInstrVarArgsCall(const Instruction *I, unsigned Opcode, // operand index is >= 2^12. // static void outputInstructionFormat1(const Instruction *I, unsigned Opcode, - const SlotCalculator &Table, int *Slots, - unsigned Type, std::deque<uchar> &Out) { + const SlotCalculator &Table, + unsigned *Slots, unsigned Type, + std::deque<uchar> &Out) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 1. @@ -138,8 +168,9 @@ static void outputInstructionFormat1(const Instruction *I, unsigned Opcode, // operand index is >= 2^8. // static void outputInstructionFormat2(const Instruction *I, unsigned Opcode, - const SlotCalculator &Table, int *Slots, - unsigned Type, std::deque<uchar> &Out) { + const SlotCalculator &Table, + unsigned *Slots, unsigned Type, + std::deque<uchar> &Out) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 2. @@ -160,8 +191,9 @@ static void outputInstructionFormat2(const Instruction *I, unsigned Opcode, // operand index is >= 2^6. // static void outputInstructionFormat3(const Instruction *I, unsigned Opcode, - const SlotCalculator &Table, int *Slots, - unsigned Type, std::deque<uchar> &Out) { + const SlotCalculator &Table, + unsigned *Slots, unsigned Type, + std::deque<uchar> &Out) { // bits Instruction format: // -------------------------- // 01-00: Opcode type, fixed to 3. @@ -181,6 +213,7 @@ static void outputInstructionFormat3(const Instruction *I, unsigned Opcode, void BytecodeWriter::outputInstruction(const Instruction &I) { assert(I.getOpcode() < 62 && "Opcode too big???"); unsigned Opcode = I.getOpcode(); + unsigned NumOperands = I.getNumOperands(); // Encode 'volatile load' as 62 and 'volatile store' as 63. if (isa<LoadInst>(I) && cast<LoadInst>(I).isVolatile()) @@ -188,17 +221,6 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { if (isa<StoreInst>(I) && cast<StoreInst>(I).isVolatile()) Opcode = 63; - unsigned NumOperands = I.getNumOperands(); - int MaxOpSlot = 0; - int Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands - - for (unsigned i = 0; i != NumOperands; ++i) { - int slot = Table.getSlot(I.getOperand(i)); - assert(slot != -1 && "Broken bytecode!"); - if (slot > MaxOpSlot) MaxOpSlot = slot; - if (i < 3) Slots[i] = slot; - } - // Figure out which type to encode with the instruction. Typically we want // the type of the first parameter, as opposed to the type of the instruction // (for example, with setcc, we always know it returns bool, but the type of @@ -226,71 +248,101 @@ void BytecodeWriter::outputInstruction(const Instruction &I) { assert(Slot != -1 && "Type not available!!?!"); Type = (unsigned)Slot; - // Make sure that we take the type number into consideration. We don't want - // to overflow the field size for the instruction format we select. - // - if (Slot > MaxOpSlot) MaxOpSlot = Slot; - - // Handle the special case for cast... - if (isa<CastInst>(I) || isa<VAArgInst>(I)) { - // Cast has to encode the destination type as the second argument in the - // packet, or else we won't know what type to cast to! - Slots[1] = Table.getSlot(I.getType()); - assert(Slots[1] != -1 && "Cast return type unknown?"); - if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; - NumOperands++; - } else if (const VANextInst *VANI = dyn_cast<VANextInst>(&I)) { - Slots[1] = Table.getSlot(VANI->getArgType()); - assert(Slots[1] != -1 && "va_next return type unknown?"); - if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; - NumOperands++; - } else if (const CallInst *CI = dyn_cast<CallInst>(&I)){// Handle VarArg calls - const PointerType *Ty = cast<PointerType>(CI->getCalledValue()->getType()); + // Varargs calls and invokes are encoded entirely different from any other + // instructions. + if (const CallInst *CI = dyn_cast<CallInst>(&I)){ + const PointerType *Ty =cast<PointerType>(CI->getCalledValue()->getType()); if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { outputInstrVarArgsCall(CI, Opcode, Table, Type, Out); return; } - } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) {// ... & Invokes - const PointerType *Ty = cast<PointerType>(II->getCalledValue()->getType()); + } else if (const InvokeInst *II = dyn_cast<InvokeInst>(&I)) { + const PointerType *Ty =cast<PointerType>(II->getCalledValue()->getType()); if (cast<FunctionType>(Ty->getElementType())->isVarArg()) { outputInstrVarArgsCall(II, Opcode, Table, Type, Out); return; } } - // Decide which instruction encoding to use. This is determined primarily by - // the number of operands, and secondarily by whether or not the max operand - // will fit into the instruction encoding. More operands == fewer bits per - // operand. - // - switch (NumOperands) { - case 0: - case 1: - if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops - outputInstructionFormat1(&I, Opcode, Table, Slots, Type, Out); - return; + if (NumOperands <= 3) { + // Make sure that we take the type number into consideration. We don't want + // to overflow the field size for the instruction format we select. + // + unsigned MaxOpSlot = Type; + unsigned Slots[3]; Slots[0] = (1 << 12)-1; // Marker to signify 0 operands + + for (unsigned i = 0; i != NumOperands; ++i) { + int slot = Table.getSlot(I.getOperand(i)); + assert(slot != -1 && "Broken bytecode!"); + if (unsigned(slot) > MaxOpSlot) MaxOpSlot = unsigned(slot); + Slots[i] = unsigned(slot); } - break; - case 2: - if (MaxOpSlot < (1 << 8)) { - outputInstructionFormat2(&I, Opcode, Table, Slots, Type, Out); - return; + // Handle the special cases for various instructions... + if (isa<CastInst>(I) || isa<VAArgInst>(I)) { + // Cast has to encode the destination type as the second argument in the + // packet, or else we won't know what type to cast to! + Slots[1] = Table.getSlot(I.getType()); + assert(Slots[1] != ~0U && "Cast return type unknown?"); + if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; + NumOperands++; + } else if (const VANextInst *VANI = dyn_cast<VANextInst>(&I)) { + Slots[1] = Table.getSlot(VANI->getArgType()); + assert(Slots[1] != ~0U && "va_next return type unknown?"); + if (Slots[1] > MaxOpSlot) MaxOpSlot = Slots[1]; + NumOperands++; + } else if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(&I)) { + // We need to encode the type of sequential type indices into their slot # + unsigned Idx = 1; + for (gep_type_iterator I = gep_type_begin(GEP), E = gep_type_end(GEP); + I != E; ++I, ++Idx) + if (isa<SequentialType>(*I)) { + unsigned IdxId; + switch (GEP->getOperand(Idx)->getType()->getPrimitiveID()) { + default: assert(0 && "Unknown index type!"); + case Type::UIntTyID: IdxId = 0; break; + case Type::IntTyID: IdxId = 1; break; + case Type::ULongTyID: IdxId = 2; break; + case Type::LongTyID: IdxId = 3; break; + } + Slots[Idx] = (Slots[Idx] << 2) | IdxId; + if (Slots[Idx] > MaxOpSlot) MaxOpSlot = Slots[Idx]; + } } - break; - case 3: - if (MaxOpSlot < (1 << 6)) { - outputInstructionFormat3(&I, Opcode, Table, Slots, Type, Out); - return; + // Decide which instruction encoding to use. This is determined primarily + // by the number of operands, and secondarily by whether or not the max + // operand will fit into the instruction encoding. More operands == fewer + // bits per operand. + // + switch (NumOperands) { + case 0: + case 1: + if (MaxOpSlot < (1 << 12)-1) { // -1 because we use 4095 to indicate 0 ops + outputInstructionFormat1(&I, Opcode, Table, Slots, Type, Out); + return; + } + break; + + case 2: + if (MaxOpSlot < (1 << 8)) { + outputInstructionFormat2(&I, Opcode, Table, Slots, Type, Out); + return; + } + break; + + case 3: + if (MaxOpSlot < (1 << 6)) { + outputInstructionFormat3(&I, Opcode, Table, Slots, Type, Out); + return; + } + break; + default: + break; } - break; - default: - break; } // If we weren't handled before here, we either have a large number of // operands or a large operand index that we are referring to. outputInstructionFormat0(&I, Opcode, Table, Type, Out); } - diff --git a/lib/Bytecode/Writer/Writer.cpp b/lib/Bytecode/Writer/Writer.cpp index 432a39f..82fe40d 100644 --- a/lib/Bytecode/Writer/Writer.cpp +++ b/lib/Bytecode/Writer/Writer.cpp @@ -54,9 +54,9 @@ BytecodeWriter::BytecodeWriter(std::deque<unsigned char> &o, const Module *M) bool hasNoEndianness = M->getEndianness() == Module::AnyEndianness; bool hasNoPointerSize = M->getPointerSize() == Module::AnyPointerSize; - // Output the version identifier... we are currently on bytecode version #1, - // which corresponds to LLVM v1.2. - unsigned Version = (1 << 4) | isBigEndian | (hasLongPointers << 1) | + // Output the version identifier... we are currently on bytecode version #2, + // which corresponds to LLVM v1.3. + unsigned Version = (2 << 4) | isBigEndian | (hasLongPointers << 1) | (hasNoEndianness << 2) | (hasNoPointerSize << 3); output_vbr(Version, Out); align32(Out); |