diff options
Diffstat (limited to 'lib/AsmParser/LLLexer.cpp')
-rw-r--r-- | lib/AsmParser/LLLexer.cpp | 168 |
1 files changed, 97 insertions, 71 deletions
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 6523bce..3bf090a 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -78,13 +78,15 @@ uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { void LLLexer::HexToIntPair(const char *Buffer, const char *End, uint64_t Pair[2]) { Pair[0] = 0; - for (int i=0; i<16; i++, Buffer++) { - assert(Buffer != End); - Pair[0] *= 16; - Pair[0] += hexDigitValue(*Buffer); + if (End - Buffer >= 16) { + for (int i = 0; i < 16; i++, Buffer++) { + assert(Buffer != End); + Pair[0] *= 16; + Pair[0] += hexDigitValue(*Buffer); + } } Pair[1] = 0; - for (int i=0; i<16 && Buffer != End; i++, Buffer++) { + for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) { Pair[1] *= 16; Pair[1] += hexDigitValue(*Buffer); } @@ -239,7 +241,7 @@ lltok::Kind LLLexer::LexToken() { case ')': return lltok::rparen; case ',': return lltok::comma; case '*': return lltok::star; - case '\\': return lltok::backslash; + case '|': return lltok::bar; } } @@ -255,46 +257,7 @@ void LLLexer::SkipLineComment() { /// GlobalVar @[-a-zA-Z$._][-a-zA-Z$._0-9]* /// GlobalVarID @[0-9]+ lltok::Kind LLLexer::LexAt() { - // Handle AtStringConstant: @\"[^\"]*\" - if (CurPtr[0] == '"') { - ++CurPtr; - - while (1) { - int CurChar = getNextChar(); - - if (CurChar == EOF) { - Error("end of file in global variable name"); - return lltok::Error; - } - if (CurChar == '"') { - StrVal.assign(TokStart+2, CurPtr-1); - UnEscapeLexed(StrVal); - if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { - Error("Null bytes are not allowed in names"); - return lltok::Error; - } - return lltok::GlobalVar; - } - } - } - - // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]* - if (ReadVarName()) - return lltok::GlobalVar; - - // Handle GlobalVarID: @[0-9]+ - if (isdigit(static_cast<unsigned char>(CurPtr[0]))) { - for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) - /*empty*/; - - uint64_t Val = atoull(TokStart+1, CurPtr); - if ((unsigned)Val != Val) - Error("invalid value number (too large)!"); - UIntVal = unsigned(Val); - return lltok::GlobalID; - } - - return lltok::Error; + return LexVar(lltok::GlobalVar, lltok::GlobalID); } lltok::Kind LLLexer::LexDollar() { @@ -370,22 +333,35 @@ bool LLLexer::ReadVarName() { return false; } -/// LexPercent - Lex all tokens that start with a % character: -/// LocalVar ::= %\"[^\"]*\" -/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* -/// LocalVarID ::= %[0-9]+ -lltok::Kind LLLexer::LexPercent() { - // Handle LocalVarName: %\"[^\"]*\" +lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) { + // Handle StringConstant: \"[^\"]*\" if (CurPtr[0] == '"') { ++CurPtr; - return ReadString(lltok::LocalVar); + + while (1) { + int CurChar = getNextChar(); + + if (CurChar == EOF) { + Error("end of file in global variable name"); + return lltok::Error; + } + if (CurChar == '"') { + StrVal.assign(TokStart+2, CurPtr-1); + UnEscapeLexed(StrVal); + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + return lltok::Error; + } + return Var; + } + } } - // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]* + // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]* if (ReadVarName()) - return lltok::LocalVar; + return Var; - // Handle LocalVarID: %[0-9]+ + // Handle VarID: [0-9]+ if (isdigit(static_cast<unsigned char>(CurPtr[0]))) { for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr) /*empty*/; @@ -394,12 +370,19 @@ lltok::Kind LLLexer::LexPercent() { if ((unsigned)Val != Val) Error("invalid value number (too large)!"); UIntVal = unsigned(Val); - return lltok::LocalVarID; + return VarID; } - return lltok::Error; } +/// LexPercent - Lex all tokens that start with a % character: +/// LocalVar ::= %\"[^\"]*\" +/// LocalVar ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]* +/// LocalVarID ::= %[0-9]+ +lltok::Kind LLLexer::LexPercent() { + return LexVar(lltok::LocalVar, lltok::LocalVarID); +} + /// LexQuote - Lex all tokens that start with a " character: /// QuoteLabel "[^"]+": /// StringConstant "[^"]*" @@ -410,7 +393,12 @@ lltok::Kind LLLexer::LexQuote() { if (CurPtr[0] == ':') { ++CurPtr; - kind = lltok::LabelStr; + if (StringRef(StrVal).find_first_of(0) != StringRef::npos) { + Error("Null bytes are not allowed in names"); + kind = lltok::Error; + } else { + kind = lltok::LabelStr; + } } return kind; @@ -499,11 +487,11 @@ lltok::Kind LLLexer::LexIdentifier() { if (!KeywordEnd) KeywordEnd = CurPtr; CurPtr = KeywordEnd; --StartChar; - unsigned Len = CurPtr-StartChar; -#define KEYWORD(STR) \ - do { \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ - return lltok::kw_##STR; \ + StringRef Keyword(StartChar, CurPtr - StartChar); +#define KEYWORD(STR) \ + do { \ + if (Keyword == #STR) \ + return lltok::kw_##STR; \ } while (0) KEYWORD(true); KEYWORD(false); @@ -573,6 +561,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(inteldialect); KEYWORD(gc); KEYWORD(prefix); + KEYWORD(prologue); KEYWORD(ccc); KEYWORD(fastcc); @@ -596,6 +585,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(anyregcc); KEYWORD(preserve_mostcc); KEYWORD(preserve_allcc); + KEYWORD(ghccc); KEYWORD(cc); KEYWORD(c); @@ -665,6 +655,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(x); KEYWORD(blockaddress); + // Metadata types. + KEYWORD(distinct); + // Use-list order directives. KEYWORD(uselistorder); KEYWORD(uselistorder_bb); @@ -676,9 +669,13 @@ lltok::Kind LLLexer::LexIdentifier() { #undef KEYWORD // Keywords for types. -#define TYPEKEYWORD(STR, LLVMTY) \ - if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \ - TyVal = LLVMTY; return lltok::Type; } +#define TYPEKEYWORD(STR, LLVMTY) \ + do { \ + if (Keyword == STR) { \ + TyVal = LLVMTY; \ + return lltok::Type; \ + } \ + } while (false) TYPEKEYWORD("void", Type::getVoidTy(Context)); TYPEKEYWORD("half", Type::getHalfTy(Context)); TYPEKEYWORD("float", Type::getFloatTy(Context)); @@ -692,9 +689,13 @@ lltok::Kind LLLexer::LexIdentifier() { #undef TYPEKEYWORD // Keywords for instructions. -#define INSTKEYWORD(STR, Enum) \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \ - UIntVal = Instruction::Enum; return lltok::kw_##STR; } +#define INSTKEYWORD(STR, Enum) \ + do { \ + if (Keyword == #STR) { \ + UIntVal = Instruction::Enum; \ + return lltok::kw_##STR; \ + } \ + } while (false) INSTKEYWORD(add, Add); INSTKEYWORD(fadd, FAdd); INSTKEYWORD(sub, Sub); INSTKEYWORD(fsub, FSub); @@ -746,6 +747,25 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(landingpad, LandingPad); #undef INSTKEYWORD +#define DWKEYWORD(TYPE, TOKEN) \ + do { \ + if (Keyword.startswith("DW_" #TYPE "_")) { \ + StrVal.assign(Keyword.begin(), Keyword.end()); \ + return lltok::TOKEN; \ + } \ + } while (false) + DWKEYWORD(TAG, DwarfTag); + DWKEYWORD(ATE, DwarfAttEncoding); + DWKEYWORD(VIRTUALITY, DwarfVirtuality); + DWKEYWORD(LANG, DwarfLang); + DWKEYWORD(OP, DwarfOp); +#undef DWKEYWORD + + if (Keyword.startswith("DIFlag")) { + StrVal.assign(Keyword.begin(), Keyword.end()); + return lltok::DIFlag; + } + // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by // the CFE to avoid forcing it to deal with 64-bit numbers. if ((TokStart[0] == 'u' || TokStart[0] == 's') && @@ -753,7 +773,13 @@ lltok::Kind LLLexer::LexIdentifier() { isxdigit(static_cast<unsigned char>(TokStart[3]))) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; - APInt Tmp(bits, StringRef(TokStart+3, len), 16); + StringRef HexStr(TokStart + 3, len); + if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) { + // Bad token, return it as an error. + CurPtr = TokStart+3; + return lltok::Error; + } + APInt Tmp(bits, HexStr, 16); uint32_t activeBits = Tmp.getActiveBits(); if (activeBits > 0 && activeBits < bits) Tmp = Tmp.trunc(activeBits); |