diff options
-rw-r--r-- | docs/TestingGuide.html | 34 | ||||
-rw-r--r-- | include/llvm/Support/Regex.h | 9 | ||||
-rw-r--r-- | lib/Support/Regex.cpp | 5 | ||||
-rw-r--r-- | unittests/Support/RegexTest.cpp | 1 | ||||
-rw-r--r-- | utils/FileCheck/FileCheck.cpp | 127 |
5 files changed, 161 insertions, 15 deletions
diff --git a/docs/TestingGuide.html b/docs/TestingGuide.html index 28a5e8a..bc19ab4 100644 --- a/docs/TestingGuide.html +++ b/docs/TestingGuide.html @@ -625,6 +625,40 @@ define i8 @coerce_offset0(i32 %V, i32* %P) { </div> <!-- _______________________________________________________________________ --> +<div class="doc_subsubsection"><a +name="FileCheck-Matching">FileCheck Pattern Matting Syntax</a></div> + +<div class="doc_text"> + +<p>The CHECK: and CHECK-NOT: directives both take a pattern to match. For most +uses of FileCheck, fixed string matching is perfectly sufficient. For some +things, a more flexible form of matching is desired. To support this, FileCheck +allows you to specify regular expressions in matching strings, surrounded by +double braces: <b>{{yourregex}}</b>. Because we want to use fixed string +matching for a majority of what we do, FileCheck has been designed to support +mixing and matching fixed string matching with regular expressions. This allows +you to write things like this:</p> + +<div class="doc_code"> +<pre> +; CHECK: movhpd <b>{{[0-9]+}}</b>(%esp), <b>{{%xmm[0-7]}}</b> +</pre> +</div> + +<p>In this case, any offset from the ESP register will be allowed, and any xmm +register will be allowed.</p> + +<p>Because regular expressions are enclosed with double braces, they are +visually distinct, and you don't need to use escape characters within the double +braces like you would in C. In the rare case that you want to match double +braces explicitly from the input, you can use something ugly like +<b>{{[{][{]}}</b> as your pattern.</p> + +</div> + + + +<!-- _______________________________________________________________________ --> <div class="doc_subsection"><a name="dgvars">Variables and substitutions</a></div> <!-- _______________________________________________________________________ --> diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h index 4c4229e..0bf253f 100644 --- a/include/llvm/Support/Regex.h +++ b/include/llvm/Support/Regex.h @@ -11,11 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" +#include <string> struct llvm_regex; + namespace llvm { + class StringRef; + template<typename T> class SmallVectorImpl; + class Regex { public: enum { @@ -54,6 +57,8 @@ namespace llvm { /// Matches. /// For this feature to be enabled you must construct the regex using /// Regex("...", Regex::Sub) constructor. + /// + /// This returns true on a successful match. bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0); private: struct llvm_regex *preg; diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp index 9f5fbb0..285e01f 100644 --- a/lib/Support/Regex.cpp +++ b/lib/Support/Regex.cpp @@ -14,13 +14,14 @@ #include "llvm/Support/Regex.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/ADT/SmallVector.h" #include "regex_impl.h" #include <string> using namespace llvm; Regex::Regex(const StringRef ®ex, unsigned Flags) { unsigned flags = 0; - preg = new struct llvm_regex; + preg = new llvm_regex(); preg->re_endp = regex.end(); if (Flags & IgnoreCase) flags |= REG_ICASE; @@ -60,7 +61,7 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){ } // pmatch needs to have at least one element. - SmallVector<llvm_regmatch_t, 2> pm; + SmallVector<llvm_regmatch_t, 8> pm; pm.resize(nmatch > 0 ? nmatch : 1); pm[0].rm_so = 0; pm[0].rm_eo = String.size(); diff --git a/unittests/Support/RegexTest.cpp b/unittests/Support/RegexTest.cpp index b323e28..28a85a1 100644 --- a/unittests/Support/RegexTest.cpp +++ b/unittests/Support/RegexTest.cpp @@ -9,6 +9,7 @@ #include "gtest/gtest.h" #include "llvm/Support/Regex.h" +#include "llvm/ADT/SmallVector.h" #include <cstring> using namespace llvm; diff --git a/utils/FileCheck/FileCheck.cpp b/utils/FileCheck/FileCheck.cpp index cd62870..8e8c1cd 100644 --- a/utils/FileCheck/FileCheck.cpp +++ b/utils/FileCheck/FileCheck.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/PrettyStackTrace.h" +#include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/System/Signals.h" @@ -44,8 +45,9 @@ NoCanonicalizeWhiteSpace("strict-whitespace", //===----------------------------------------------------------------------===// class Pattern { - /// Str - The string to match. - StringRef Str; + /// Chunks - The pattern chunks to match. If the bool is false, it is a fixed + /// string match, if it is true, it is a regex match. + SmallVector<std::pair<StringRef, bool>, 4> Chunks; public: Pattern() { } @@ -55,10 +57,7 @@ public: /// Match - Match the pattern string against the input buffer Buffer. This /// returns the position that is matched or npos if there is no match. If /// there is a match, the size of the matched string is returned in MatchLen. - size_t Match(StringRef Buffer, size_t &MatchLen) const { - MatchLen = Str.size(); - return Buffer.find(Str); - } + size_t Match(StringRef Buffer, size_t &MatchLen) const; }; bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { @@ -74,11 +73,117 @@ bool Pattern::ParsePattern(StringRef PatternStr, SourceMgr &SM) { "error"); return true; } + + // Scan the pattern to break it into regex and non-regex pieces. + while (!PatternStr.empty()) { + // Handle fixed string matches. + if (PatternStr.size() < 2 || + PatternStr[0] != '{' || PatternStr[1] != '{') { + // Find the end, which is the start of the next regex. + size_t FixedMatchEnd = PatternStr.find("{{"); + + Chunks.push_back(std::make_pair(PatternStr.substr(0, FixedMatchEnd), + false)); + PatternStr = PatternStr.substr(FixedMatchEnd); + continue; + } + + // Otherwise, this is the start of a regex match. Scan for the }}. + size_t End = PatternStr.find("}}"); + if (End == StringRef::npos) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()), + "found start of regex string with no end '}}'", "error"); + return true; + } + + Regex R(PatternStr.substr(2, End-2)); + std::string Error; + if (!R.isValid(Error)) { + SM.PrintMessage(SMLoc::getFromPointer(PatternStr.data()+2), + "invalid regex: " + Error, "error"); + return true; + } + + Chunks.push_back(std::make_pair(PatternStr.substr(2, End-2), true)); + PatternStr = PatternStr.substr(End+2); + } + + return false; +} +/// Match - Match the pattern string against the input buffer Buffer. This +/// returns the position that is matched or npos if there is no match. If +/// there is a match, the size of the matched string is returned in MatchLen. +size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const { + size_t FirstMatch = StringRef::npos; + MatchLen = 0; + SmallVector<StringRef, 4> MatchInfo; - Str = PatternStr; - return false; + while (!Buffer.empty()) { + StringRef MatchAttempt = Buffer; + + unsigned ChunkNo = 0, e = Chunks.size(); + for (; ChunkNo != e; ++ChunkNo) { + StringRef PatternStr = Chunks[ChunkNo].first; + + size_t ThisMatch = StringRef::npos; + size_t ThisLength = StringRef::npos; + if (!Chunks[ChunkNo].second) { + // Fixed string match. + ThisMatch = MatchAttempt.find(Chunks[ChunkNo].first); + ThisLength = Chunks[ChunkNo].first.size(); + } else if (Regex(Chunks[ChunkNo].first, Regex::Sub).match(MatchAttempt, &MatchInfo)) { + // Successful regex match. + assert(!MatchInfo.empty() && "Didn't get any match"); + StringRef FullMatch = MatchInfo[0]; + MatchInfo.clear(); + + ThisMatch = FullMatch.data()-MatchAttempt.data(); + ThisLength = FullMatch.size(); + } + + // Otherwise, what we do depends on if this is the first match or not. If + // this is the first match, it doesn't match to match at the start of + // MatchAttempt. + if (ChunkNo == 0) { + // If the first match fails then this pattern will never match in + // Buffer. + if (ThisMatch == StringRef::npos) + return ThisMatch; + + FirstMatch = ThisMatch; + MatchAttempt = MatchAttempt.substr(FirstMatch); + ThisMatch = 0; + } + + // If this chunk didn't match, then the entire pattern didn't match from + // FirstMatch, try later in the buffer. + if (ThisMatch == StringRef::npos) + break; + + // Ok, if the match didn't match at the beginning of MatchAttempt, then we + // have something like "ABC{{DEF}} and something was in-between. Reject + // the match. + if (ThisMatch != 0) + break; + + // Otherwise, match the string and move to the next chunk. + MatchLen += ThisLength; + MatchAttempt = MatchAttempt.substr(ThisLength); + } + + // If the whole thing matched, we win. + if (ChunkNo == e) + return FirstMatch; + + // Otherwise, try matching again after FirstMatch to see if this pattern + // matches later in the buffer. + Buffer = Buffer.substr(FirstMatch+1); + } + + // If we ran out of stuff to scan, then we didn't match. + return StringRef::npos; } @@ -367,14 +472,14 @@ int main(int argc, char **argv) { // If this match had "not strings", verify that they don't exist in the // skipped region. - for (unsigned i = 0, e = CheckStr.NotStrings.size(); i != e; ++i) { + for (unsigned ChunkNo = 0, e = CheckStr.NotStrings.size(); ChunkNo != e; ++ChunkNo) { size_t MatchLen = 0; - size_t Pos = CheckStr.NotStrings[i].second.Match(SkippedRegion, MatchLen); + size_t Pos = CheckStr.NotStrings[ChunkNo].second.Match(SkippedRegion, MatchLen); if (Pos == StringRef::npos) continue; SM.PrintMessage(SMLoc::getFromPointer(LastMatch+Pos), CheckPrefix+"-NOT: string occurred!", "error"); - SM.PrintMessage(CheckStr.NotStrings[i].first, + SM.PrintMessage(CheckStr.NotStrings[ChunkNo].first, CheckPrefix+"-NOT: pattern specified here", "note"); return 1; } |