diff options
author | Chris Lattner <sabre@nondot.org> | 2009-09-27 07:56:52 +0000 |
---|---|---|
committer | Chris Lattner <sabre@nondot.org> | 2009-09-27 07:56:52 +0000 |
commit | 8599166fed9834e8ba77289f61214bfd9a050b20 (patch) | |
tree | ce88177bfb43ff68eacbacff55802074117a4877 /utils | |
parent | 3b3b54d35e1e9f0089c7efdca231a04c77e7d061 (diff) | |
download | external_llvm-8599166fed9834e8ba77289f61214bfd9a050b20.zip external_llvm-8599166fed9834e8ba77289f61214bfd9a050b20.tar.gz external_llvm-8599166fed9834e8ba77289f61214bfd9a050b20.tar.bz2 |
implement and document support for filecheck variables. This
allows matching and remembering a string and then matching and
verifying that the string occurs later in the file.
Change X86/xor.ll to use this in some cases where the test was
checking for an arbitrary register allocation decision.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82891 91177308-0d34-0410-b5e6-96231b3b80d8
Diffstat (limited to 'utils')
-rw-r--r-- | utils/FileCheck/FileCheck.cpp | 215 |
1 files changed, 172 insertions, 43 deletions
diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index 4c5999b..b4d1f84 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -23,6 +23,8 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" +#include "llvm/ADT/StringMap.h" +#include <algorithm> using namespace llvm; static cl::opt<std::string> @@ -45,7 +47,6 @@ NoCanonicalizeWhiteSpace("strict-whitespace", //===----------------------------------------------------------------------===// class Pattern { - SourceMgr *SM; SMLoc PatternLoc; /// FixedStr - If non-empty, this pattern is a fixed string match with the @@ -54,6 +55,19 @@ class Pattern { /// RegEx - If non-empty, this is a regex pattern. std::string RegExStr; + + /// VariableUses - Entries in this vector map to uses of a variable in the + /// pattern, e.g. "foo[[bar]]baz". In this case, the RegExStr will contain + /// "foobaz" and we'll get an entry in this vector that tells us to insert the + /// value of bar at offset 3. + std::vector<std::pair<StringRef, unsigned> > VariableUses; + + /// VariableDefs - Entries in this vector map to definitions of a variable in + /// the pattern, e.g. "foo[[bar:.*]]baz". In this case, the RegExStr will + /// contain "foo(.*)baz" and VariableDefs will contain the pair "bar",1. The + /// index indicates what parenthesized value captures the variable value. + std::vector<std::pair<StringRef, unsigned> > VariableDefs; + public: Pattern() { } @@ -63,14 +77,19 @@ public: /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. - size_t Match(StringRef Buffer, size_t &MatchLen) const; + /// + /// The VariableTable StringMap provides the current values of filecheck + /// variables and is updated if this match defines new values. + size_t Match(StringRef Buffer, size_t &MatchLen, + StringMap<StringRef> &VariableTable) const; private: - void AddFixedStringToRegEx(StringRef FixedStr); + static void AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr); + bool AddRegExToRegEx(StringRef RegExStr, unsigned &CurParen, SourceMgr &SM); }; + bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { - this->SM = &SM; PatternLoc = SMLoc::getFromPointer(PatternStr.data()); // Ignore trailing whitespace. @@ -86,49 +105,113 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { } // Check to see if this is a fixed string, or if it has regex pieces. - if (PatternStr.size() < 2 || PatternStr.find("{{") == StringRef::npos) { + if (PatternStr.size() < 2 || + (PatternStr.find("{{") == StringRef::npos && + PatternStr.find("[[") == StringRef::npos)) { FixedStr = PatternStr; return false; } + // Paren value #0 is for the fully matched string. Any new parenthesized + // values add from their. + unsigned CurParen = 1; + // Otherwise, there is at least one regex piece. Build up the regex pattern // by escaping scary characters in fixed strings, building up one big regex. while (!PatternStr.empty()) { - // Handle fixed string matches. - if (PatternStr.size() < 2 || - PatternStr[0] != '{' || PatternStr[1] != '{') { - // Find the end, which is the start of the next regex. - size_t FixedMatchEnd = PatternStr.find("{{"); - AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd)); - PatternStr = PatternStr.substr(FixedMatchEnd); + // RegEx matches. + if (PatternStr.size() >= 2 && + PatternStr[0] == '{' && PatternStr[1] == '{') { + + // Otherwise, this is the start of a regex match. Scan for the }}. + size_t End = PatternStr.find("}}"); + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + "found start of regex string with no end '}}'", "error"); + return true; + } + + if (AddRegExToRegEx(PatternStr.substr(2, End-2), CurParen, SM)) + return true; + PatternStr = PatternStr.substr(End+2); continue; } - // Otherwise, this is the start of a regex match. Scan for the }}. - size_t End = PatternStr.find("}}"); - if (End == StringRef::npos) { - SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), - "found start of regex string with no end '}}'", "error"); - return true; - } - - StringRef RegexStr = PatternStr.substr(2, End-2); - Regex R(RegexStr); - std::string Error; - if (!R.isValid(Error)) { - SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2), - "invalid regex: " + Error, "error"); - return true; + // Named RegEx matches. These are of two forms: [[foo:.*]] which matches .* + // (or some other regex) and assigns it to the FileCheck variable 'foo'. The + // second form is [[foo]] which is a reference to foo. The variable name + // itself must be of the form "[a-zA-Z][0-9a-zA-Z]*", otherwise we reject + // it. This is to catch some common errors. + if (PatternStr.size() >= 2 && + PatternStr[0] == '[' && PatternStr[1] == '[') { + // Verify that it is terminated properly. + size_t End = PatternStr.find("]]"); + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + "invalid named regex reference, no ]] found", "error"); + return true; + } + + StringRef MatchStr = PatternStr.substr(2, End-2); + PatternStr = PatternStr.substr(End+2); + + // Get the regex name (e.g. "foo"). + size_t NameEnd = MatchStr.find(':'); + StringRef Name = MatchStr.substr(0, NameEnd); + + if (Name.empty()) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()), + "invalid name in named regex: empty name", "error"); + return true; + } + + // Verify that the name is well formed. + for (unsigned i = 0, e = Name.size(); i != e; ++i) + if ((Name[i] < 'a' || Name[i] > 'z') && + (Name[i] < 'A' || Name[i] > 'Z') && + (Name[i] < '0' || Name[i] > '9')) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()+i), + "invalid name in named regex", "error"); + return true; + } + + // Name can't start with a digit. + if (isdigit(Name[0])) { + SM.PrintMessage(SMLoc::getFromPointer(Name.data()), + "invalid name in named regex", "error"); + return true; + } + + // Handle [[foo]]. + if (NameEnd == StringRef::npos) { + VariableUses.push_back(std::make_pair(Name, RegExStr.size())); + continue; + } + + // Handle [[foo:.*]]. + VariableDefs.push_back(std::make_pair(Name, CurParen)); + RegExStr += '('; + ++CurParen; + + if (AddRegExToRegEx(MatchStr.substr(NameEnd+1), CurParen, SM)) + return true; + + RegExStr += ')'; } - RegExStr += RegexStr.str(); - PatternStr = PatternStr.substr(End+2); + // Handle fixed string matches. + // Find the end, which is the start of the next regex. + size_t FixedMatchEnd = PatternStr.find("{{"); + FixedMatchEnd = std::min(FixedMatchEnd, PatternStr.find("[[")); + AddFixedStringToRegEx(PatternStr.substr(0, FixedMatchEnd), RegExStr); + PatternStr = PatternStr.substr(FixedMatchEnd); + continue; } return false; } -void Pattern::AddFixedStringToRegEx(StringRef FixedStr) { +void Pattern::AddFixedStringToRegEx(StringRef FixedStr, std::string &TheStr) { // Add the characters from FixedStr to the regex, escaping as needed. This // avoids "leaning toothpicks" in common patterns. for (unsigned i = 0, e = FixedStr.size(); i != e; ++i) { @@ -146,41 +229,81 @@ void Pattern::AddFixedStringToRegEx(StringRef FixedStr) { case '[': case '\\': case '{': - RegExStr += '\\'; + TheStr += '\\'; // FALL THROUGH. default: - RegExStr += FixedStr[i]; + TheStr += FixedStr[i]; break; } } } +bool Pattern::AddRegExToRegEx(StringRef RegexStr, unsigned &CurParen, + SourceMgr &SM) { + Regex R(RegexStr); + std::string Error; + if (!R.isValid(Error)) { + SM.PrintMessage(SMLoc::getFromPointer(RegexStr.data()), + "invalid regex: " + Error, "error"); + return true; + } + + RegExStr += RegexStr.str(); + CurParen += R.getNumMatches(); + return false; +} /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. -size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const { +size_t Pattern::Match(StringRef Buffer, size_t &MatchLen, + StringMap<StringRef> &VariableTable) const { // If this is a fixed string pattern, just match it now. if (!FixedStr.empty()) { MatchLen = FixedStr.size(); return Buffer.find(FixedStr); } - + // Regex match. + + // If there are variable uses, we need to create a temporary string with the + // actual value. + StringRef RegExToMatch = RegExStr; + std::string TmpStr; + if (!VariableUses.empty()) { + TmpStr = RegExStr; + + unsigned InsertOffset = 0; + for (unsigned i = 0, e = VariableUses.size(); i != e; ++i) { + // Look up the value and escape it so that we can plop it into the regex. + std::string Value; + AddFixedStringToRegEx(VariableTable[VariableUses[i].first], Value); + + // Plop it into the regex at the adjusted offset. + TmpStr.insert(TmpStr.begin()+VariableUses[i].second+InsertOffset, + Value.begin(), Value.end()); + InsertOffset += Value.size(); + } + + // Match the newly constructed regex. + RegExToMatch = TmpStr; + } + + SmallVector<StringRef, 4> MatchInfo; - if (!Regex(RegExStr, Regex::Newline).match(Buffer, &MatchInfo)) + if (!Regex(RegExToMatch, Regex::Newline).match(Buffer, &MatchInfo)) return StringRef::npos; // Successful regex match. assert(!MatchInfo.empty() && "Didn't get any match"); StringRef FullMatch = MatchInfo[0]; - - if (MatchInfo.size() != 1) { - SM->PrintMessage(PatternLoc, "regex cannot use grouping parens", "error"); - exit(1); + // If this defines any variables, remember their values. + for (unsigned i = 0, e = VariableDefs.size(); i != e; ++i) { + assert(VariableDefs[i].second < MatchInfo.size() && + "Internal paren error"); + VariableTable[VariableDefs[i].first] = MatchInfo[VariableDefs[i].second]; } - MatchLen = FullMatch.size(); return FullMatch.data()-Buffer.data(); @@ -415,6 +538,9 @@ int main(int argc, char **argv) { SM.AddNewSourceBuffer(F, SMLoc()); + /// VariableTable - This holds all the current filecheck variables. + StringMap<StringRef> VariableTable; + // Check that we have all of the expected strings, in order, in the input // file. StringRef Buffer = F->getBuffer(); @@ -428,7 +554,7 @@ int main(int argc, char **argv) { // Find StrNo in the file. size_t MatchLen = 0; - Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen)); + Buffer = Buffer.substr(CheckStr.Pat.Match(Buffer, MatchLen, VariableTable)); // If we didn't find a match, reject the input. if (Buffer.empty()) { @@ -472,9 +598,12 @@ int main(int argc, char **argv) { // If this match had "not strings", verify that they don't exist in the // skipped region. - for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) { + for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); + ChunkNo != e; ++ChunkNo) { size_t MatchLen = 0; - size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen); + size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, + MatchLen, + VariableTable); if (Pos == StringRef::npos) continue; SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), |