From 7c788888872233748da10a8177a9a1eb176c1bc8 Mon Sep 17 00:00:00 2001 From: Peter Collingbourne Date: Sat, 1 Oct 2011 16:41:13 +0000 Subject: Move TableGen's parser and entry point into a library This is the first step towards splitting LLVM and Clang's tblgen executables. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140951 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 435 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 435 insertions(+) create mode 100644 lib/TableGen/TGLexer.cpp (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp new file mode 100644 index 0000000..0dc1c70 --- /dev/null +++ b/lib/TableGen/TGLexer.cpp @@ -0,0 +1,435 @@ +//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implement the Lexer for TableGen. +// +//===----------------------------------------------------------------------===// + +#include "TGLexer.h" +#include "llvm/TableGen/Error.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Config/config.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/ADT/Twine.h" +#include +#include +#include +#include +#include +using namespace llvm; + +TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) { + CurBuffer = 0; + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + TokStart = 0; +} + +SMLoc TGLexer::getLoc() const { + return SMLoc::getFromPointer(TokStart); +} + +/// ReturnError - Set the error to the specified string at the specified +/// location. This is defined to always return tgtok::Error. +tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) { + PrintError(Loc, Msg); + return tgtok::Error; +} + +int TGLexer::getNextChar() { + char CurChar = *CurPtr++; + switch (CurChar) { + default: + return (unsigned char)CurChar; + case 0: { + // A nul character in the stream is either the end of the current buffer or + // a random nul in the file. Disambiguate that here. + if (CurPtr-1 != CurBuf->getBufferEnd()) + return 0; // Just whitespace. + + // If this is the end of an included file, pop the parent file off the + // include stack. + SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); + if (ParentIncludeLoc != SMLoc()) { + CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc); + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = ParentIncludeLoc.getPointer(); + return getNextChar(); + } + + // Otherwise, return end of file. + --CurPtr; // Another call to lex will return EOF again. + return EOF; + } + case '\n': + case '\r': + // Handle the newline character by ignoring it and incrementing the line + // count. However, be careful about 'dos style' files with \n\r in them. + // Only treat a \n\r or \r\n as a single line. + if ((*CurPtr == '\n' || (*CurPtr == '\r')) && + *CurPtr != CurChar) + ++CurPtr; // Eat the two char newline sequence. + return '\n'; + } +} + +tgtok::TokKind TGLexer::LexToken() { + TokStart = CurPtr; + // This always consumes at least one character. + int CurChar = getNextChar(); + + switch (CurChar) { + default: + // Handle letters: [a-zA-Z_#] + if (isalpha(CurChar) || CurChar == '_' || CurChar == '#') + return LexIdentifier(); + + // Unknown character, emit an error. + return ReturnError(TokStart, "Unexpected character"); + case EOF: return tgtok::Eof; + case ':': return tgtok::colon; + case ';': return tgtok::semi; + case '.': return tgtok::period; + case ',': return tgtok::comma; + case '<': return tgtok::less; + case '>': return tgtok::greater; + case ']': return tgtok::r_square; + case '{': return tgtok::l_brace; + case '}': return tgtok::r_brace; + case '(': return tgtok::l_paren; + case ')': return tgtok::r_paren; + case '=': return tgtok::equal; + case '?': return tgtok::question; + + case 0: + case ' ': + case '\t': + case '\n': + case '\r': + // Ignore whitespace. + return LexToken(); + case '/': + // If this is the start of a // comment, skip until the end of the line or + // the end of the buffer. + if (*CurPtr == '/') + SkipBCPLComment(); + else if (*CurPtr == '*') { + if (SkipCComment()) + return tgtok::Error; + } else // Otherwise, this is an error. + return ReturnError(TokStart, "Unexpected character"); + return LexToken(); + case '-': case '+': + case '0': case '1': case '2': case '3': case '4': case '5': case '6': + case '7': case '8': case '9': + return LexNumber(); + case '"': return LexString(); + case '$': return LexVarName(); + case '[': return LexBracket(); + case '!': return LexExclaim(); + } +} + +/// LexString - Lex "[^"]*" +tgtok::TokKind TGLexer::LexString() { + const char *StrStart = CurPtr; + + CurStrVal = ""; + + while (*CurPtr != '"') { + // If we hit the end of the buffer, report an error. + if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd()) + return ReturnError(StrStart, "End of file in string literal"); + + if (*CurPtr == '\n' || *CurPtr == '\r') + return ReturnError(StrStart, "End of line in string literal"); + + if (*CurPtr != '\\') { + CurStrVal += *CurPtr++; + continue; + } + + ++CurPtr; + + switch (*CurPtr) { + case '\\': case '\'': case '"': + // These turn into their literal character. + CurStrVal += *CurPtr++; + break; + case 't': + CurStrVal += '\t'; + ++CurPtr; + break; + case 'n': + CurStrVal += '\n'; + ++CurPtr; + break; + + case '\n': + case '\r': + return ReturnError(CurPtr, "escaped newlines not supported in tblgen"); + + // If we hit the end of the buffer, report an error. + case '\0': + if (CurPtr == CurBuf->getBufferEnd()) + return ReturnError(StrStart, "End of file in string literal"); + // FALL THROUGH + default: + return ReturnError(CurPtr, "invalid escape in string literal"); + } + } + + ++CurPtr; + return tgtok::StrVal; +} + +tgtok::TokKind TGLexer::LexVarName() { + if (!isalpha(CurPtr[0]) && CurPtr[0] != '_') + return ReturnError(TokStart, "Invalid variable name"); + + // Otherwise, we're ok, consume the rest of the characters. + const char *VarNameStart = CurPtr++; + + while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') + ++CurPtr; + + CurStrVal.assign(VarNameStart, CurPtr); + return tgtok::VarName; +} + + +tgtok::TokKind TGLexer::LexIdentifier() { + // The first letter is [a-zA-Z_#]. + const char *IdentStart = TokStart; + + // Match the rest of the identifier regex: [0-9a-zA-Z_#]* + while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' || + *CurPtr == '#') + ++CurPtr; + + + // Check to see if this identifier is a keyword. + unsigned Len = CurPtr-IdentStart; + + if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int; + if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit; + if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits; + if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String; + if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List; + if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code; + if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag; + + if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class; + if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def; + if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm; + if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) + return tgtok::MultiClass; + if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field; + if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let; + if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In; + + if (Len == 7 && !memcmp(IdentStart, "include", 7)) { + if (LexInclude()) return tgtok::Error; + return Lex(); + } + + CurStrVal.assign(IdentStart, CurPtr); + return tgtok::Id; +} + +/// LexInclude - We just read the "include" token. Get the string token that +/// comes next and enter the include. +bool TGLexer::LexInclude() { + // The token after the include must be a string. + tgtok::TokKind Tok = LexToken(); + if (Tok == tgtok::Error) return true; + if (Tok != tgtok::StrVal) { + PrintError(getLoc(), "Expected filename after include"); + return true; + } + + // Get the string. + std::string Filename = CurStrVal; + std::string IncludedFile; + + + CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr), + IncludedFile); + if (CurBuffer == -1) { + PrintError(getLoc(), "Could not find include file '" + Filename + "'"); + return true; + } + + Dependencies.push_back(IncludedFile); + // Save the line number and lex buffer of the includer. + CurBuf = SrcMgr.getMemoryBuffer(CurBuffer); + CurPtr = CurBuf->getBufferStart(); + return false; +} + +void TGLexer::SkipBCPLComment() { + ++CurPtr; // skip the second slash. + while (1) { + switch (*CurPtr) { + case '\n': + case '\r': + return; // Newline is end of comment. + case 0: + // If this is the end of the buffer, end the comment. + if (CurPtr == CurBuf->getBufferEnd()) + return; + break; + } + // Otherwise, skip the character. + ++CurPtr; + } +} + +/// SkipCComment - This skips C-style /**/ comments. The only difference from C +/// is that we allow nesting. +bool TGLexer::SkipCComment() { + ++CurPtr; // skip the star. + unsigned CommentDepth = 1; + + while (1) { + int CurChar = getNextChar(); + switch (CurChar) { + case EOF: + PrintError(TokStart, "Unterminated comment!"); + return true; + case '*': + // End of the comment? + if (CurPtr[0] != '/') break; + + ++CurPtr; // End the */. + if (--CommentDepth == 0) + return false; + break; + case '/': + // Start of a nested comment? + if (CurPtr[0] != '*') break; + ++CurPtr; + ++CommentDepth; + break; + } + } +} + +/// LexNumber - Lex: +/// [-+]?[0-9]+ +/// 0x[0-9a-fA-F]+ +/// 0b[01]+ +tgtok::TokKind TGLexer::LexNumber() { + if (CurPtr[-1] == '0') { + if (CurPtr[0] == 'x') { + ++CurPtr; + const char *NumStart = CurPtr; + while (isxdigit(CurPtr[0])) + ++CurPtr; + + // Requires at least one hex digit. + if (CurPtr == NumStart) + return ReturnError(TokStart, "Invalid hexadecimal number"); + + errno = 0; + CurIntVal = strtoll(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(TokStart, "Invalid hexadecimal number"); + if (errno == ERANGE) { + errno = 0; + CurIntVal = (int64_t)strtoull(NumStart, 0, 16); + if (errno == EINVAL) + return ReturnError(TokStart, "Invalid hexadecimal number"); + if (errno == ERANGE) + return ReturnError(TokStart, "Hexadecimal number out of range"); + } + return tgtok::IntVal; + } else if (CurPtr[0] == 'b') { + ++CurPtr; + const char *NumStart = CurPtr; + while (CurPtr[0] == '0' || CurPtr[0] == '1') + ++CurPtr; + + // Requires at least one binary digit. + if (CurPtr == NumStart) + return ReturnError(CurPtr-2, "Invalid binary number"); + CurIntVal = strtoll(NumStart, 0, 2); + return tgtok::IntVal; + } + } + + // Check for a sign without a digit. + if (!isdigit(CurPtr[0])) { + if (CurPtr[-1] == '-') + return tgtok::minus; + else if (CurPtr[-1] == '+') + return tgtok::plus; + } + + while (isdigit(CurPtr[0])) + ++CurPtr; + CurIntVal = strtoll(TokStart, 0, 10); + return tgtok::IntVal; +} + +/// LexBracket - We just read '['. If this is a code block, return it, +/// otherwise return the bracket. Match: '[' and '[{ ( [^}]+ | }[^]] )* }]' +tgtok::TokKind TGLexer::LexBracket() { + if (CurPtr[0] != '{') + return tgtok::l_square; + ++CurPtr; + const char *CodeStart = CurPtr; + while (1) { + int Char = getNextChar(); + if (Char == EOF) break; + + if (Char != '}') continue; + + Char = getNextChar(); + if (Char == EOF) break; + if (Char == ']') { + CurStrVal.assign(CodeStart, CurPtr-2); + return tgtok::CodeFragment; + } + } + + return ReturnError(CodeStart-2, "Unterminated Code Block"); +} + +/// LexExclaim - Lex '!' and '![a-zA-Z]+'. +tgtok::TokKind TGLexer::LexExclaim() { + if (!isalpha(*CurPtr)) + return ReturnError(CurPtr - 1, "Invalid \"!operator\""); + + const char *Start = CurPtr++; + while (isalpha(*CurPtr)) + ++CurPtr; + + // Check to see which operator this is. + tgtok::TokKind Kind = + StringSwitch(StringRef(Start, CurPtr - Start)) + .Case("eq", tgtok::XEq) + .Case("if", tgtok::XIf) + .Case("head", tgtok::XHead) + .Case("tail", tgtok::XTail) + .Case("con", tgtok::XConcat) + .Case("shl", tgtok::XSHL) + .Case("sra", tgtok::XSRA) + .Case("srl", tgtok::XSRL) + .Case("cast", tgtok::XCast) + .Case("empty", tgtok::XEmpty) + .Case("subst", tgtok::XSubst) + .Case("foreach", tgtok::XForEach) + .Case("strconcat", tgtok::XStrConcat) + .Default(tgtok::Error); + + return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator"); +} + -- cgit v1.1 From b9c29eaa3d2c39ce549c4bb2849a4d901acc8368 Mon Sep 17 00:00:00 2001 From: David Greene Date: Wed, 5 Oct 2011 22:42:35 +0000 Subject: Lexer Support for Multidefs Add keyword support for multidefs. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141231 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 1 + 1 file changed, 1 insertion(+) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index 0dc1c70..a993cfd 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -228,6 +228,7 @@ tgtok::TokKind TGLexer::LexIdentifier() { if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class; if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def; + if (Len == 8 && !memcmp(IdentStart, "multidef", 3)) return tgtok::MultiDef; if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm; if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return tgtok::MultiClass; -- cgit v1.1 From c2d18f8929770cdcb8329e0e14e3a37ba068059a Mon Sep 17 00:00:00 2001 From: David Greene Date: Thu, 6 Oct 2011 14:37:47 +0000 Subject: Fix Typo Compare the entire keyword string. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141295 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index a993cfd..fec30d7 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -228,7 +228,7 @@ tgtok::TokKind TGLexer::LexIdentifier() { if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class; if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def; - if (Len == 8 && !memcmp(IdentStart, "multidef", 3)) return tgtok::MultiDef; + if (Len == 8 && !memcmp(IdentStart, "multidef", 8)) return tgtok::MultiDef; if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm; if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) return tgtok::MultiClass; -- cgit v1.1 From 37d42af584f07c78d990a8f1bb128046aab2182d Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 6 Oct 2011 18:23:56 +0000 Subject: Simplify code. No functionality change. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141299 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 50 +++++++++++++++++++++++------------------------- 1 file changed, 24 insertions(+), 26 deletions(-) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index fec30d7..55bf522 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -208,40 +208,38 @@ tgtok::TokKind TGLexer::LexVarName() { tgtok::TokKind TGLexer::LexIdentifier() { // The first letter is [a-zA-Z_#]. const char *IdentStart = TokStart; - + // Match the rest of the identifier regex: [0-9a-zA-Z_#]* while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' || *CurPtr == '#') ++CurPtr; - - + // Check to see if this identifier is a keyword. - unsigned Len = CurPtr-IdentStart; - - if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int; - if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit; - if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits; - if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String; - if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List; - if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code; - if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag; - - if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class; - if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def; - if (Len == 8 && !memcmp(IdentStart, "multidef", 8)) return tgtok::MultiDef; - if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm; - if (Len == 10 && !memcmp(IdentStart, "multiclass", 10)) - return tgtok::MultiClass; - if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field; - if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let; - if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In; - - if (Len == 7 && !memcmp(IdentStart, "include", 7)) { + StringRef Str(IdentStart, CurPtr-IdentStart); + + if (Str == "int") return tgtok::Int; + if (Str == "bit") return tgtok::Bit; + if (Str == "bits") return tgtok::Bits; + if (Str == "string") return tgtok::String; + if (Str == "list") return tgtok::List; + if (Str == "code") return tgtok::Code; + if (Str == "dag") return tgtok::Dag; + + if (Str == "class") return tgtok::Class; + if (Str == "def") return tgtok::Def; + if (Str == "multidef") return tgtok::MultiDef; + if (Str == "defm") return tgtok::Defm; + if (Str == "multiclass") return tgtok::MultiClass; + if (Str == "field") return tgtok::Field; + if (Str == "let") return tgtok::Let; + if (Str == "in") return tgtok::In; + + if (Str == "include") { if (LexInclude()) return tgtok::Error; return Lex(); } - - CurStrVal.assign(IdentStart, CurPtr); + + CurStrVal.assign(Str.begin(), Str.end()); return tgtok::Id; } -- cgit v1.1 From ee573189c653c3261102ccd627bb571ab7535034 Mon Sep 17 00:00:00 2001 From: Benjamin Kramer Date: Thu, 6 Oct 2011 18:53:43 +0000 Subject: Use StringSwitch. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141305 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 40 +++++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 19 deletions(-) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index 55bf522..5a6c8aa 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -217,30 +217,32 @@ tgtok::TokKind TGLexer::LexIdentifier() { // Check to see if this identifier is a keyword. StringRef Str(IdentStart, CurPtr-IdentStart); - if (Str == "int") return tgtok::Int; - if (Str == "bit") return tgtok::Bit; - if (Str == "bits") return tgtok::Bits; - if (Str == "string") return tgtok::String; - if (Str == "list") return tgtok::List; - if (Str == "code") return tgtok::Code; - if (Str == "dag") return tgtok::Dag; - - if (Str == "class") return tgtok::Class; - if (Str == "def") return tgtok::Def; - if (Str == "multidef") return tgtok::MultiDef; - if (Str == "defm") return tgtok::Defm; - if (Str == "multiclass") return tgtok::MultiClass; - if (Str == "field") return tgtok::Field; - if (Str == "let") return tgtok::Let; - if (Str == "in") return tgtok::In; - if (Str == "include") { if (LexInclude()) return tgtok::Error; return Lex(); } - CurStrVal.assign(Str.begin(), Str.end()); - return tgtok::Id; + tgtok::TokKind Kind = StringSwitch(Str) + .Case("int", tgtok::Int) + .Case("bit", tgtok::Bit) + .Case("bits", tgtok::Bits) + .Case("string", tgtok::String) + .Case("list", tgtok::List) + .Case("code", tgtok::Code) + .Case("dag", tgtok::Dag) + .Case("class", tgtok::Class) + .Case("def", tgtok::Def) + .Case("multidef", tgtok::MultiDef) + .Case("defm", tgtok::Defm) + .Case("multiclass", tgtok::MultiClass) + .Case("field", tgtok::Field) + .Case("let", tgtok::Let) + .Case("in", tgtok::In) + .Default(tgtok::Id); + + if (Kind == tgtok::Id) + CurStrVal.assign(Str.begin(), Str.end()); + return Kind; } /// LexInclude - We just read the "include" token. Get the string token that -- cgit v1.1 From a1b1b79be15c4b79a4282f148085ebad1cf877ca Mon Sep 17 00:00:00 2001 From: David Greene Date: Fri, 7 Oct 2011 18:25:05 +0000 Subject: Remove Multidefs Multidefs are a bit unwieldy and incomplete. Remove them in favor of another mechanism, probably for loops. Revert "Make Test More Thorough" Revert "Fix a typo." Revert "Vim Support for Multidefs" Revert "Emacs Support for Multidefs" Revert "Document Multidefs" Revert "Add a Multidef Test" Revert "Update Test for Multidefs" Revert "Process Multidefs" Revert "Parser Multidef Support" Revert "Lexer Support for Multidefs" Revert "Add Multidef Data Structures" git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141378 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index 5a6c8aa..8c1b429 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -232,7 +232,6 @@ tgtok::TokKind TGLexer::LexIdentifier() { .Case("dag", tgtok::Dag) .Case("class", tgtok::Class) .Case("def", tgtok::Def) - .Case("multidef", tgtok::MultiDef) .Case("defm", tgtok::Defm) .Case("multiclass", tgtok::MultiClass) .Case("field", tgtok::Field) -- cgit v1.1 From a761f92cd38572dd65cc995c5f59b9c2c0f51068 Mon Sep 17 00:00:00 2001 From: David Greene Date: Wed, 19 Oct 2011 13:03:35 +0000 Subject: Add Peek Add a peek function to let the Lexer look at a character arbitrarily far ahead in the stream without consuming anything. We need this to disambiguate numbers and operands of a paste operation. For example: def foo#8i Without lookahead the lexer will treat '8' as a number rather than as part of a string to be pasted to form an identifier. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142512 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index 8c1b429..c1b00b6 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -80,6 +80,10 @@ int TGLexer::getNextChar() { } } +int TGLexer::peekNextChar(int Index) { + return *(CurPtr + Index); +} + tgtok::TokKind TGLexer::LexToken() { TokStart = CurPtr; // This always consumes at least one character. -- cgit v1.1 From 7efe93625183a52733c23adc02c5c9c4337a7970 Mon Sep 17 00:00:00 2001 From: David Greene Date: Wed, 19 Oct 2011 13:03:39 +0000 Subject: Disambiguate Numbers and Identifiers Use lookahead to determine whether a number is really a number or is part of something forming an identifier. This won't come into play until the paste operator is recognized as a unique token. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142513 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 38 +++++++++++++++++++++++++++++++++++++- 1 file changed, 37 insertions(+), 1 deletion(-) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index c1b00b6..3262121 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -132,8 +132,44 @@ tgtok::TokKind TGLexer::LexToken() { return LexToken(); case '-': case '+': case '0': case '1': case '2': case '3': case '4': case '5': case '6': - case '7': case '8': case '9': + case '7': case '8': case '9': { + int NextChar = 0; + if (isdigit(CurChar)) { + // Allow identifiers to start with a number if it is followed by + // an identifier. This can happen with paste operations like + // foo#8i. + int i = 0; + do { + NextChar = peekNextChar(i++); + } while (isdigit(NextChar)); + + if (NextChar == 'x' || NextChar == 'b') { + // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most + // likely a number. + int NextNextChar = peekNextChar(i); + switch (NextNextChar) { + default: + break; + case '0': case '1': + if (NextChar == 'b') + return LexNumber(); + // Fallthrough + case '2': case '3': case '4': case '5': + case '6': case '7': case '8': case '9': + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + if (NextChar == 'x') + return LexNumber(); + break; + } + } + } + + if (isalpha(NextChar) || NextChar == '_') + return LexIdentifier(); + return LexNumber(); + } case '"': return LexString(); case '$': return LexVarName(); case '[': return LexBracket(); -- cgit v1.1 From d3d1cad535d1c88e13e8e082c136260ee624967f Mon Sep 17 00:00:00 2001 From: David Greene Date: Wed, 19 Oct 2011 13:04:43 +0000 Subject: Implement Paste Add a paste operator '#' to take two identifier-like strings and joint them. Internally paste gets represented as a !strconcat() with any necessary casts to string added. This will be used to implement basic for loop functionality as in: for i = [0, 1, 2, 3, 4, 5, 6, 7] { def R#i : Register<...> } git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142525 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/TableGen/TGLexer.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'lib/TableGen/TGLexer.cpp') diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp index 3262121..45d0b1e 100644 --- a/lib/TableGen/TGLexer.cpp +++ b/lib/TableGen/TGLexer.cpp @@ -91,10 +91,10 @@ tgtok::TokKind TGLexer::LexToken() { switch (CurChar) { default: - // Handle letters: [a-zA-Z_#] - if (isalpha(CurChar) || CurChar == '_' || CurChar == '#') + // Handle letters: [a-zA-Z_] + if (isalpha(CurChar) || CurChar == '_') return LexIdentifier(); - + // Unknown character, emit an error. return ReturnError(TokStart, "Unexpected character"); case EOF: return tgtok::Eof; @@ -111,6 +111,7 @@ tgtok::TokKind TGLexer::LexToken() { case ')': return tgtok::r_paren; case '=': return tgtok::equal; case '?': return tgtok::question; + case '#': return tgtok::paste; case 0: case ' ': @@ -250,8 +251,7 @@ tgtok::TokKind TGLexer::LexIdentifier() { const char *IdentStart = TokStart; // Match the rest of the identifier regex: [0-9a-zA-Z_#]* - while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' || - *CurPtr == '#') + while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_') ++CurPtr; // Check to see if this identifier is a keyword. -- cgit v1.1