From 7c788888872233748da10a8177a9a1eb176c1bc8 Mon Sep 17 00:00:00 2001
From: Peter Collingbourne <peter@pcc.me.uk>
Date: Sat, 1 Oct 2011 16:41:13 +0000
Subject: Move TableGen's parser and entry point into a library

This is the first step towards splitting LLVM and Clang's tblgen executables.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@140951 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 435 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 435 insertions(+)
 create mode 100644 lib/TableGen/TGLexer.cpp

(limited to 'lib/TableGen/TGLexer.cpp')
diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
new file mode 100644
index 0000000..0dc1c70
--- /dev/null
+++ b/lib/TableGen/TGLexer.cpp
@@ -0,0 +1,435 @@
+//===- TGLexer.cpp - Lexer for TableGen -----------------------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implement the Lexer for TableGen.
+//
+//===----------------------------------------------------------------------===//
+
+#include "TGLexer.h"
+#include "llvm/TableGen/Error.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Config/config.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/ADT/Twine.h"
+#include <cctype>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+#include <cerrno>
+using namespace llvm;
+
+TGLexer::TGLexer(SourceMgr &SM) : SrcMgr(SM) {
+  CurBuffer = 0;
+  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+  CurPtr = CurBuf->getBufferStart();
+  TokStart = 0;
+}
+
+SMLoc TGLexer::getLoc() const {
+  return SMLoc::getFromPointer(TokStart);
+}
+
+/// ReturnError - Set the error to the specified string at the specified
+/// location.  This is defined to always return tgtok::Error.
+tgtok::TokKind TGLexer::ReturnError(const char *Loc, const Twine &Msg) {
+  PrintError(Loc, Msg);
+  return tgtok::Error;
+}
+
+int TGLexer::getNextChar() {
+  char CurChar = *CurPtr++;
+  switch (CurChar) {
+  default:
+    return (unsigned char)CurChar;
+  case 0: {
+    // A nul character in the stream is either the end of the current buffer or
+    // a random nul in the file.  Disambiguate that here.
+    if (CurPtr-1 != CurBuf->getBufferEnd())
+      return 0;  // Just whitespace.
+    
+    // If this is the end of an included file, pop the parent file off the
+    // include stack.
+    SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer);
+    if (ParentIncludeLoc != SMLoc()) {
+      CurBuffer = SrcMgr.FindBufferContainingLoc(ParentIncludeLoc);
+      CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+      CurPtr = ParentIncludeLoc.getPointer();
+      return getNextChar();
+    }
+    
+    // Otherwise, return end of file.
+    --CurPtr;  // Another call to lex will return EOF again.  
+    return EOF;
+  }
+  case '\n':
+  case '\r':
+    // Handle the newline character by ignoring it and incrementing the line
+    // count.  However, be careful about 'dos style' files with \n\r in them.
+    // Only treat a \n\r or \r\n as a single line.
+    if ((*CurPtr == '\n' || (*CurPtr == '\r')) &&
+        *CurPtr != CurChar)
+      ++CurPtr;  // Eat the two char newline sequence.
+    return '\n';
+  }  
+}
+
+tgtok::TokKind TGLexer::LexToken() {
+  TokStart = CurPtr;
+  // This always consumes at least one character.
+  int CurChar = getNextChar();
+
+  switch (CurChar) {
+  default:
+    // Handle letters: [a-zA-Z_#]
+    if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+      return LexIdentifier();
+      
+    // Unknown character, emit an error.
+    return ReturnError(TokStart, "Unexpected character");
+  case EOF: return tgtok::Eof;
+  case ':': return tgtok::colon;
+  case ';': return tgtok::semi;
+  case '.': return tgtok::period;
+  case ',': return tgtok::comma;
+  case '<': return tgtok::less;
+  case '>': return tgtok::greater;
+  case ']': return tgtok::r_square;
+  case '{': return tgtok::l_brace;
+  case '}': return tgtok::r_brace;
+  case '(': return tgtok::l_paren;
+  case ')': return tgtok::r_paren;
+  case '=': return tgtok::equal;
+  case '?': return tgtok::question;
+      
+  case 0:
+  case ' ':
+  case '\t':
+  case '\n':
+  case '\r':
+    // Ignore whitespace.
+    return LexToken();
+  case '/':
+    // If this is the start of a // comment, skip until the end of the line or
+    // the end of the buffer.
+    if (*CurPtr == '/')
+      SkipBCPLComment();
+    else if (*CurPtr == '*') {
+      if (SkipCComment())
+        return tgtok::Error;
+    } else // Otherwise, this is an error.
+      return ReturnError(TokStart, "Unexpected character");
+    return LexToken();
+  case '-': case '+':
+  case '0': case '1': case '2': case '3': case '4': case '5': case '6':
+  case '7': case '8': case '9':  
+    return LexNumber();
+  case '"': return LexString();
+  case '$': return LexVarName();
+  case '[': return LexBracket();
+  case '!': return LexExclaim();
+  }
+}
+
+/// LexString - Lex "[^"]*"
+tgtok::TokKind TGLexer::LexString() {
+  const char *StrStart = CurPtr;
+  
+  CurStrVal = "";
+  
+  while (*CurPtr != '"') {
+    // If we hit the end of the buffer, report an error.
+    if (*CurPtr == 0 && CurPtr == CurBuf->getBufferEnd())
+      return ReturnError(StrStart, "End of file in string literal");
+    
+    if (*CurPtr == '\n' || *CurPtr == '\r')
+      return ReturnError(StrStart, "End of line in string literal");
+    
+    if (*CurPtr != '\\') {
+      CurStrVal += *CurPtr++;
+      continue;
+    }
+
+    ++CurPtr;
+    
+    switch (*CurPtr) {
+    case '\\': case '\'': case '"':
+      // These turn into their literal character.
+      CurStrVal += *CurPtr++;
+      break;
+    case 't':
+      CurStrVal += '\t';
+      ++CurPtr;
+      break;
+    case 'n':
+      CurStrVal += '\n';
+      ++CurPtr;
+      break;
+        
+    case '\n':
+    case '\r':
+      return ReturnError(CurPtr, "escaped newlines not supported in tblgen");
+
+    // If we hit the end of the buffer, report an error.
+    case '\0':
+      if (CurPtr == CurBuf->getBufferEnd())
+        return ReturnError(StrStart, "End of file in string literal");
+      // FALL THROUGH
+    default:
+      return ReturnError(CurPtr, "invalid escape in string literal");
+    }
+  }
+  
+  ++CurPtr;
+  return tgtok::StrVal;
+}
+
+tgtok::TokKind TGLexer::LexVarName() {
+  if (!isalpha(CurPtr[0]) && CurPtr[0] != '_')
+    return ReturnError(TokStart, "Invalid variable name");
+  
+  // Otherwise, we're ok, consume the rest of the characters.
+  const char *VarNameStart = CurPtr++;
+  
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
+    ++CurPtr;
+
+  CurStrVal.assign(VarNameStart, CurPtr);
+  return tgtok::VarName;
+}
+
+
+tgtok::TokKind TGLexer::LexIdentifier() {
+  // The first letter is [a-zA-Z_#].
+  const char *IdentStart = TokStart;
+  
+  // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
+         *CurPtr == '#')
+    ++CurPtr;
+  
+  
+  // Check to see if this identifier is a keyword.
+  unsigned Len = CurPtr-IdentStart;
+  
+  if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int;
+  if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit;
+  if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits;
+  if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String;
+  if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List;
+  if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code;
+  if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag;
+  
+  if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class;
+  if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def;
+  if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm;
+  if (Len == 10 && !memcmp(IdentStart, "multiclass", 10))
+    return tgtok::MultiClass;
+  if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field;
+  if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let;
+  if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In;
+  
+  if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
+    if (LexInclude()) return tgtok::Error;
+    return Lex();
+  }
+    
+  CurStrVal.assign(IdentStart, CurPtr);
+  return tgtok::Id;
+}
+
+/// LexInclude - We just read the "include" token.  Get the string token that
+/// comes next and enter the include.
+bool TGLexer::LexInclude() {
+  // The token after the include must be a string.
+  tgtok::TokKind Tok = LexToken();
+  if (Tok == tgtok::Error) return true;
+  if (Tok != tgtok::StrVal) {
+    PrintError(getLoc(), "Expected filename after include");
+    return true;
+  }
+
+  // Get the string.
+  std::string Filename = CurStrVal;
+  std::string IncludedFile;
+
+  
+  CurBuffer = SrcMgr.AddIncludeFile(Filename, SMLoc::getFromPointer(CurPtr),
+                                    IncludedFile);
+  if (CurBuffer == -1) {
+    PrintError(getLoc(), "Could not find include file '" + Filename + "'");
+    return true;
+  }
+  
+  Dependencies.push_back(IncludedFile);
+  // Save the line number and lex buffer of the includer.
+  CurBuf = SrcMgr.getMemoryBuffer(CurBuffer);
+  CurPtr = CurBuf->getBufferStart();
+  return false;
+}
+
+void TGLexer::SkipBCPLComment() {
+  ++CurPtr;  // skip the second slash.
+  while (1) {
+    switch (*CurPtr) {
+    case '\n':
+    case '\r':
+      return;  // Newline is end of comment.
+    case 0:
+      // If this is the end of the buffer, end the comment.
+      if (CurPtr == CurBuf->getBufferEnd())
+        return;
+      break;
+    }
+    // Otherwise, skip the character.
+    ++CurPtr;
+  }
+}
+
+/// SkipCComment - This skips C-style /**/ comments.  The only difference from C
+/// is that we allow nesting.
+bool TGLexer::SkipCComment() {
+  ++CurPtr;  // skip the star.
+  unsigned CommentDepth = 1;
+  
+  while (1) {
+    int CurChar = getNextChar();
+    switch (CurChar) {
+    case EOF:
+      PrintError(TokStart, "Unterminated comment!");
+      return true;
+    case '*':
+      // End of the comment?
+      if (CurPtr[0] != '/') break;
+      
+      ++CurPtr;   // End the */.
+      if (--CommentDepth == 0)
+        return false;
+      break;
+    case '/':
+      // Start of a nested comment?
+      if (CurPtr[0] != '*') break;
+      ++CurPtr;
+      ++CommentDepth;
+      break;
+    }
+  }
+}
+
+/// LexNumber - Lex:
+///    [-+]?[0-9]+
+///    0x[0-9a-fA-F]+
+///    0b[01]+
+tgtok::TokKind TGLexer::LexNumber() {
+  if (CurPtr[-1] == '0') {
+    if (CurPtr[0] == 'x') {
+      ++CurPtr;
+      const char *NumStart = CurPtr;
+      while (isxdigit(CurPtr[0]))
+        ++CurPtr;
+      
+      // Requires at least one hex digit.
+      if (CurPtr == NumStart)
+        return ReturnError(TokStart, "Invalid hexadecimal number");
+
+      errno = 0;
+      CurIntVal = strtoll(NumStart, 0, 16);
+      if (errno == EINVAL)
+        return ReturnError(TokStart, "Invalid hexadecimal number");
+      if (errno == ERANGE) {
+        errno = 0;
+        CurIntVal = (int64_t)strtoull(NumStart, 0, 16);
+        if (errno == EINVAL)
+          return ReturnError(TokStart, "Invalid hexadecimal number");
+        if (errno == ERANGE)
+          return ReturnError(TokStart, "Hexadecimal number out of range");
+      }
+      return tgtok::IntVal;
+    } else if (CurPtr[0] == 'b') {
+      ++CurPtr;
+      const char *NumStart = CurPtr;
+      while (CurPtr[0] == '0' || CurPtr[0] == '1')
+        ++CurPtr;
+
+      // Requires at least one binary digit.
+      if (CurPtr == NumStart)
+        return ReturnError(CurPtr-2, "Invalid binary number");
+      CurIntVal = strtoll(NumStart, 0, 2);
+      return tgtok::IntVal;
+    }
+  }
+
+  // Check for a sign without a digit.
+  if (!isdigit(CurPtr[0])) {
+    if (CurPtr[-1] == '-')
+      return tgtok::minus;
+    else if (CurPtr[-1] == '+')
+      return tgtok::plus;
+  }
+  
+  while (isdigit(CurPtr[0]))
+    ++CurPtr;
+  CurIntVal = strtoll(TokStart, 0, 10);
+  return tgtok::IntVal;
+}
+
+/// LexBracket - We just read '['.  If this is a code block, return it,
+/// otherwise return the bracket.  Match: '[' and '[{ ( [^}]+ | }[^]] )* }]'
+tgtok::TokKind TGLexer::LexBracket() {
+  if (CurPtr[0] != '{')
+    return tgtok::l_square;
+  ++CurPtr;
+  const char *CodeStart = CurPtr;
+  while (1) {
+    int Char = getNextChar();
+    if (Char == EOF) break;
+    
+    if (Char != '}') continue;
+    
+    Char = getNextChar();
+    if (Char == EOF) break;
+    if (Char == ']') {
+      CurStrVal.assign(CodeStart, CurPtr-2);
+      return tgtok::CodeFragment;
+    }
+  }
+  
+  return ReturnError(CodeStart-2, "Unterminated Code Block");
+}
+
+/// LexExclaim - Lex '!' and '![a-zA-Z]+'.
+tgtok::TokKind TGLexer::LexExclaim() {
+  if (!isalpha(*CurPtr))
+    return ReturnError(CurPtr - 1, "Invalid \"!operator\"");
+  
+  const char *Start = CurPtr++;
+  while (isalpha(*CurPtr))
+    ++CurPtr;
+  
+  // Check to see which operator this is.
+  tgtok::TokKind Kind =
+    StringSwitch<tgtok::TokKind>(StringRef(Start, CurPtr - Start))
+    .Case("eq", tgtok::XEq)
+    .Case("if", tgtok::XIf)
+    .Case("head", tgtok::XHead)
+    .Case("tail", tgtok::XTail)
+    .Case("con", tgtok::XConcat)
+    .Case("shl", tgtok::XSHL)
+    .Case("sra", tgtok::XSRA)
+    .Case("srl", tgtok::XSRL)
+    .Case("cast", tgtok::XCast)
+    .Case("empty", tgtok::XEmpty)
+    .Case("subst", tgtok::XSubst)
+    .Case("foreach", tgtok::XForEach)
+    .Case("strconcat", tgtok::XStrConcat)
+    .Default(tgtok::Error);
+
+  return Kind != tgtok::Error ? Kind : ReturnError(Start-1, "Unknown operator");
+}
+
-- 
cgit v1.1


From b9c29eaa3d2c39ce549c4bb2849a4d901acc8368 Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Wed, 5 Oct 2011 22:42:35 +0000
Subject: Lexer Support for Multidefs

Add keyword support for multidefs.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141231 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 1 +
 1 file changed, 1 insertion(+)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 0dc1c70..a993cfd 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -228,6 +228,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
   
   if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class;
   if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def;
+  if (Len == 8 && !memcmp(IdentStart, "multidef", 3)) return tgtok::MultiDef;
   if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm;
   if (Len == 10 && !memcmp(IdentStart, "multiclass", 10))
     return tgtok::MultiClass;
-- 
cgit v1.1


From c2d18f8929770cdcb8329e0e14e3a37ba068059a Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Thu, 6 Oct 2011 14:37:47 +0000
Subject: Fix Typo

Compare the entire keyword string.



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141295 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index a993cfd..fec30d7 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -228,7 +228,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
   
   if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class;
   if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def;
-  if (Len == 8 && !memcmp(IdentStart, "multidef", 3)) return tgtok::MultiDef;
+  if (Len == 8 && !memcmp(IdentStart, "multidef", 8)) return tgtok::MultiDef;
   if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm;
   if (Len == 10 && !memcmp(IdentStart, "multiclass", 10))
     return tgtok::MultiClass;
-- 
cgit v1.1


From 37d42af584f07c78d990a8f1bb128046aab2182d Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 6 Oct 2011 18:23:56 +0000
Subject: Simplify code. No functionality change.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141299 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 50 +++++++++++++++++++++++-------------------------
 1 file changed, 24 insertions(+), 26 deletions(-)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index fec30d7..55bf522 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -208,40 +208,38 @@ tgtok::TokKind TGLexer::LexVarName() {
 tgtok::TokKind TGLexer::LexIdentifier() {
   // The first letter is [a-zA-Z_#].
   const char *IdentStart = TokStart;
-  
+
   // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
   while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
          *CurPtr == '#')
     ++CurPtr;
-  
-  
+
   // Check to see if this identifier is a keyword.
-  unsigned Len = CurPtr-IdentStart;
-  
-  if (Len == 3 && !memcmp(IdentStart, "int", 3)) return tgtok::Int;
-  if (Len == 3 && !memcmp(IdentStart, "bit", 3)) return tgtok::Bit;
-  if (Len == 4 && !memcmp(IdentStart, "bits", 4)) return tgtok::Bits;
-  if (Len == 6 && !memcmp(IdentStart, "string", 6)) return tgtok::String;
-  if (Len == 4 && !memcmp(IdentStart, "list", 4)) return tgtok::List;
-  if (Len == 4 && !memcmp(IdentStart, "code", 4)) return tgtok::Code;
-  if (Len == 3 && !memcmp(IdentStart, "dag", 3)) return tgtok::Dag;
-  
-  if (Len == 5 && !memcmp(IdentStart, "class", 5)) return tgtok::Class;
-  if (Len == 3 && !memcmp(IdentStart, "def", 3)) return tgtok::Def;
-  if (Len == 8 && !memcmp(IdentStart, "multidef", 8)) return tgtok::MultiDef;
-  if (Len == 4 && !memcmp(IdentStart, "defm", 4)) return tgtok::Defm;
-  if (Len == 10 && !memcmp(IdentStart, "multiclass", 10))
-    return tgtok::MultiClass;
-  if (Len == 5 && !memcmp(IdentStart, "field", 5)) return tgtok::Field;
-  if (Len == 3 && !memcmp(IdentStart, "let", 3)) return tgtok::Let;
-  if (Len == 2 && !memcmp(IdentStart, "in", 2)) return tgtok::In;
-  
-  if (Len == 7 && !memcmp(IdentStart, "include", 7)) {
+  StringRef Str(IdentStart, CurPtr-IdentStart);
+
+  if (Str == "int") return tgtok::Int;
+  if (Str == "bit") return tgtok::Bit;
+  if (Str == "bits") return tgtok::Bits;
+  if (Str == "string") return tgtok::String;
+  if (Str == "list") return tgtok::List;
+  if (Str == "code") return tgtok::Code;
+  if (Str == "dag") return tgtok::Dag;
+
+  if (Str == "class") return tgtok::Class;
+  if (Str == "def") return tgtok::Def;
+  if (Str == "multidef") return tgtok::MultiDef;
+  if (Str == "defm") return tgtok::Defm;
+  if (Str == "multiclass") return tgtok::MultiClass;
+  if (Str == "field") return tgtok::Field;
+  if (Str == "let") return tgtok::Let;
+  if (Str == "in") return tgtok::In;
+
+  if (Str == "include") {
     if (LexInclude()) return tgtok::Error;
     return Lex();
   }
-    
-  CurStrVal.assign(IdentStart, CurPtr);
+
+  CurStrVal.assign(Str.begin(), Str.end());
   return tgtok::Id;
 }
 
-- 
cgit v1.1


From ee573189c653c3261102ccd627bb571ab7535034 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Thu, 6 Oct 2011 18:53:43 +0000
Subject: Use StringSwitch.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141305 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 40 +++++++++++++++++++++-------------------
 1 file changed, 21 insertions(+), 19 deletions(-)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 55bf522..5a6c8aa 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -217,30 +217,32 @@ tgtok::TokKind TGLexer::LexIdentifier() {
   // Check to see if this identifier is a keyword.
   StringRef Str(IdentStart, CurPtr-IdentStart);
 
-  if (Str == "int") return tgtok::Int;
-  if (Str == "bit") return tgtok::Bit;
-  if (Str == "bits") return tgtok::Bits;
-  if (Str == "string") return tgtok::String;
-  if (Str == "list") return tgtok::List;
-  if (Str == "code") return tgtok::Code;
-  if (Str == "dag") return tgtok::Dag;
-
-  if (Str == "class") return tgtok::Class;
-  if (Str == "def") return tgtok::Def;
-  if (Str == "multidef") return tgtok::MultiDef;
-  if (Str == "defm") return tgtok::Defm;
-  if (Str == "multiclass") return tgtok::MultiClass;
-  if (Str == "field") return tgtok::Field;
-  if (Str == "let") return tgtok::Let;
-  if (Str == "in") return tgtok::In;
-
   if (Str == "include") {
     if (LexInclude()) return tgtok::Error;
     return Lex();
   }
 
-  CurStrVal.assign(Str.begin(), Str.end());
-  return tgtok::Id;
+  tgtok::TokKind Kind = StringSwitch<tgtok::TokKind>(Str)
+    .Case("int", tgtok::Int)
+    .Case("bit", tgtok::Bit)
+    .Case("bits", tgtok::Bits)
+    .Case("string", tgtok::String)
+    .Case("list", tgtok::List)
+    .Case("code", tgtok::Code)
+    .Case("dag", tgtok::Dag)
+    .Case("class", tgtok::Class)
+    .Case("def", tgtok::Def)
+    .Case("multidef", tgtok::MultiDef)
+    .Case("defm", tgtok::Defm)
+    .Case("multiclass", tgtok::MultiClass)
+    .Case("field", tgtok::Field)
+    .Case("let", tgtok::Let)
+    .Case("in", tgtok::In)
+    .Default(tgtok::Id);
+
+  if (Kind == tgtok::Id)
+    CurStrVal.assign(Str.begin(), Str.end());
+  return Kind;
 }
 
 /// LexInclude - We just read the "include" token.  Get the string token that
-- 
cgit v1.1


From a1b1b79be15c4b79a4282f148085ebad1cf877ca Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Fri, 7 Oct 2011 18:25:05 +0000
Subject: Remove Multidefs

Multidefs are a bit unwieldy and incomplete.  Remove them in favor of
another mechanism, probably for loops.

Revert "Make Test More Thorough"
Revert "Fix a typo."
Revert "Vim Support for Multidefs"
Revert "Emacs Support for Multidefs"
Revert "Document Multidefs"
Revert "Add a Multidef Test"
Revert "Update Test for Multidefs"
Revert "Process Multidefs"
Revert "Parser Multidef Support"
Revert "Lexer Support for Multidefs"
Revert "Add Multidef Data Structures"

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@141378 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 1 -
 1 file changed, 1 deletion(-)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 5a6c8aa..8c1b429 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -232,7 +232,6 @@ tgtok::TokKind TGLexer::LexIdentifier() {
     .Case("dag", tgtok::Dag)
     .Case("class", tgtok::Class)
     .Case("def", tgtok::Def)
-    .Case("multidef", tgtok::MultiDef)
     .Case("defm", tgtok::Defm)
     .Case("multiclass", tgtok::MultiClass)
     .Case("field", tgtok::Field)
-- 
cgit v1.1


From a761f92cd38572dd65cc995c5f59b9c2c0f51068 Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Wed, 19 Oct 2011 13:03:35 +0000
Subject: Add Peek

Add a peek function to let the Lexer look at a character arbitrarily
far ahead in the stream without consuming anything.  We need this to
disambiguate numbers and operands of a paste operation.  For example:

def foo#8i

Without lookahead the lexer will treat '8' as a number rather than as
part of a string to be pasted to form an identifier.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142512 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 8c1b429..c1b00b6 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -80,6 +80,10 @@ int TGLexer::getNextChar() {
   }  
 }
 
+int TGLexer::peekNextChar(int Index) {
+  return *(CurPtr + Index);
+}
+
 tgtok::TokKind TGLexer::LexToken() {
   TokStart = CurPtr;
   // This always consumes at least one character.
-- 
cgit v1.1


From 7efe93625183a52733c23adc02c5c9c4337a7970 Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Wed, 19 Oct 2011 13:03:39 +0000
Subject: Disambiguate Numbers and Identifiers

Use lookahead to determine whether a number is really a number or is
part of something forming an identifier.  This won't come into play
until the paste operator is recognized as a unique token.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142513 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 38 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 37 insertions(+), 1 deletion(-)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index c1b00b6..3262121 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -132,8 +132,44 @@ tgtok::TokKind TGLexer::LexToken() {
     return LexToken();
   case '-': case '+':
   case '0': case '1': case '2': case '3': case '4': case '5': case '6':
-  case '7': case '8': case '9':  
+  case '7': case '8': case '9': {
+    int NextChar = 0;
+    if (isdigit(CurChar)) {
+      // Allow identifiers to start with a number if it is followed by
+      // an identifier.  This can happen with paste operations like
+      // foo#8i.
+      int i = 0;
+      do {
+        NextChar = peekNextChar(i++);
+      } while (isdigit(NextChar));
+
+      if (NextChar == 'x' || NextChar == 'b') {
+        // If this is [0-9]b[01] or [0-9]x[0-9A-fa-f] this is most
+        // likely a number.
+        int NextNextChar = peekNextChar(i);
+        switch (NextNextChar) {
+        default:
+          break;
+        case '0': case '1': 
+          if (NextChar == 'b')
+            return LexNumber();
+          // Fallthrough
+        case '2': case '3': case '4': case '5':
+        case '6': case '7': case '8': case '9':
+        case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+        case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+          if (NextChar == 'x')
+            return LexNumber();
+          break;
+        }
+      }
+    }
+
+    if (isalpha(NextChar) || NextChar == '_')
+      return LexIdentifier();
+
     return LexNumber();
+  }
   case '"': return LexString();
   case '$': return LexVarName();
   case '[': return LexBracket();
-- 
cgit v1.1


From d3d1cad535d1c88e13e8e082c136260ee624967f Mon Sep 17 00:00:00 2001
From: David Greene <greened@obbligato.org>
Date: Wed, 19 Oct 2011 13:04:43 +0000
Subject: Implement Paste

Add a paste operator '#' to take two identifier-like strings and joint
them.  Internally paste gets represented as a !strconcat() with any
necessary casts to string added.

This will be used to implement basic for loop functionality as in:

for i = [0, 1, 2, 3, 4, 5, 6, 7] {
  def R#i : Register<...>
}

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@142525 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/TableGen/TGLexer.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'lib/TableGen/TGLexer.cpp')

diff --git a/lib/TableGen/TGLexer.cpp b/lib/TableGen/TGLexer.cpp
index 3262121..45d0b1e 100644
--- a/lib/TableGen/TGLexer.cpp
+++ b/lib/TableGen/TGLexer.cpp
@@ -91,10 +91,10 @@ tgtok::TokKind TGLexer::LexToken() {
 
   switch (CurChar) {
   default:
-    // Handle letters: [a-zA-Z_#]
-    if (isalpha(CurChar) || CurChar == '_' || CurChar == '#')
+    // Handle letters: [a-zA-Z_]
+    if (isalpha(CurChar) || CurChar == '_')
       return LexIdentifier();
-      
+
     // Unknown character, emit an error.
     return ReturnError(TokStart, "Unexpected character");
   case EOF: return tgtok::Eof;
@@ -111,6 +111,7 @@ tgtok::TokKind TGLexer::LexToken() {
   case ')': return tgtok::r_paren;
   case '=': return tgtok::equal;
   case '?': return tgtok::question;
+  case '#': return tgtok::paste;
       
   case 0:
   case ' ':
@@ -250,8 +251,7 @@ tgtok::TokKind TGLexer::LexIdentifier() {
   const char *IdentStart = TokStart;
 
   // Match the rest of the identifier regex: [0-9a-zA-Z_#]*
-  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_' ||
-         *CurPtr == '#')
+  while (isalpha(*CurPtr) || isdigit(*CurPtr) || *CurPtr == '_')
     ++CurPtr;
 
   // Check to see if this identifier is a keyword.
-- 
cgit v1.1