Introduce an optimisation for special case lists with large numbers of literal entries.

Our internal regex implementation does not cope with large numbers of anchors very efficiently. Given a ~3600-entry special case list, regex compilation can take on the order of seconds. This patch solves the problem for the special case of patterns matching literal global names (i.e. patterns with no regex metacharacters). Rather than forming regexes from literal global name patterns, add them to a StringSet which is checked before matching against the regex. This reduces regex compilation time by an order of roughly thousands when reading the aforementioned special case list, according to a completely unscientific study. No test cases. I figure that any new tests for this code should check that regex metacharacters are properly recognised. However, I could not find any documentation which documents the fact that the syntax of global names in special case lists is based on regexes. The extent to which regex syntax is supported in special case lists should probably be decided on/documented before writing tests. Differential Revision: http://llvm-reviews.chandlerc.com/D1150 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@187732 91177308-0d34-0410-b5e6-96231b3b80d8
author: Peter Collingbourne <peter@pcc.me.uk> 2013-08-05 17:48:04 +0000
committer: Peter Collingbourne <peter@pcc.me.uk> 2013-08-05 17:48:04 +0000
commit: acf4cf775763c6896f14f1d3a988058de05ca704 (patch)
tree: 5f5dfc289e5bc30745596d0f91146bcb77d6867f /lib/Transforms/Utils/SpecialCaseList.cpp
parent: aa80e61b0d79ddf9593f6217063574d0c66c3099 (diff)
download: external_llvm-acf4cf775763c6896f14f1d3a988058de05ca704.zip
external_llvm-acf4cf775763c6896f14f1d3a988058de05ca704.tar.gz
external_llvm-acf4cf775763c6896f14f1d3a988058de05ca704.tar.bz2
1 files changed, 38 insertions, 13 deletions
diff --git a/lib/Transforms/Utils/SpecialCaseList.cpp b/lib/Transforms/Utils/SpecialCaseList.cpp
index ef8a7ac..b98cb5b 100644
--- a/lib/Transforms/Utils/SpecialCaseList.cpp
+++ b/lib/Transforms/Utils/SpecialCaseList.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
@@ -32,6 +33,22 @@
 
 namespace llvm {
 
+/// Represents a set of regular expressions.  Regular expressions which are
+/// "literal" (i.e. no regex metacharacters) are stored in Strings, while all
+/// others are represented as a single pipe-separated regex in RegEx.  The
+/// reason for doing so is efficiency; StringSet is much faster at matching
+/// literal strings than Regex.
+struct SpecialCaseList::Entry {
+  StringSet<> Strings;
+  Regex *RegEx;
+
+  Entry() : RegEx(0) {}
+
+  bool match(StringRef Query) const {
+    return Strings.count(Query) || (RegEx && RegEx->match(Query));
+  }
+};
+
 SpecialCaseList::SpecialCaseList(const StringRef Path) {
   // Validate and open blacklist file.
   if (Path.empty()) return;
@@ -82,6 +99,12 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
       Category = "init";
     }
 
+    // See if we can store Regexp in Strings.
+    if (Regex::isLiteralERE(Regexp)) {
+      Entries[Prefix][Category].Strings.insert(Regexp);
+      continue;
+    }
+
     // Replace * with .*
     for (size_t pos = 0; (pos = Regexp.find("*", pos)) != std::string::npos;
          pos += strlen(".*")) {
@@ -109,16 +132,20 @@ void SpecialCaseList::init(const MemoryBuffer *MB) {
     for (StringMap<std::string>::const_iterator II = I->second.begin(),
                                                 IE = I->second.end();
          II != IE; ++II) {
-      Entries[I->getKey()][II->getKey()] = new Regex(II->getValue());
+      Entries[I->getKey()][II->getKey()].RegEx = new Regex(II->getValue());
     }
   }
 }
 
 SpecialCaseList::~SpecialCaseList() {
-  for (StringMap<StringMap<Regex*> >::iterator I = Entries.begin(),
-                                               E = Entries.end();
+  for (StringMap<StringMap<Entry> >::iterator I = Entries.begin(),
+                                              E = Entries.end();
        I != E; ++I) {
-    DeleteContainerSeconds(I->second);
+    for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                          IE = I->second.end();
+         II != IE; ++II) {
+      delete II->second.RegEx;
+    }
   }
 }
 
@@ -169,14 +196,13 @@ bool SpecialCaseList::isIn(const Module &M, const StringRef Category) const {
 bool SpecialCaseList::findCategory(const StringRef Section,
                                    const StringRef Query,
                                    StringRef &Category) const {
-  StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
   if (I == Entries.end()) return false;
 
-  for (StringMap<Regex *>::const_iterator II = I->second.begin(),
-                                          IE = I->second.end();
+  for (StringMap<Entry>::const_iterator II = I->second.begin(),
+                                        IE = I->second.end();
        II != IE; ++II) {
-    Regex *FunctionRegex = II->getValue();
-    if (FunctionRegex->match(Query)) {
+    if (II->getValue().match(Query)) {
       Category = II->first();
       return true;
     }
@@ -188,13 +214,12 @@ bool SpecialCaseList::findCategory(const StringRef Section,
 bool SpecialCaseList::inSectionCategory(const StringRef Section,
                                         const StringRef Query,
                                         const StringRef Category) const {
-  StringMap<StringMap<Regex *> >::const_iterator I = Entries.find(Section);
+  StringMap<StringMap<Entry> >::const_iterator I = Entries.find(Section);
   if (I == Entries.end()) return false;
-  StringMap<Regex *>::const_iterator II = I->second.find(Category);
+  StringMap<Entry>::const_iterator II = I->second.find(Category);
   if (II == I->second.end()) return false;
 
-  Regex *FunctionRegex = II->getValue();
-  return FunctionRegex->match(Query);
+  return II->getValue().match(Query);
 }
 
 }  // namespace llvm
author	Peter Collingbourne <peter@pcc.me.uk>	2013-08-05 17:48:04 +0000
committer	Peter Collingbourne <peter@pcc.me.uk>	2013-08-05 17:48:04 +0000
commit	acf4cf775763c6896f14f1d3a988058de05ca704 (patch)
tree	5f5dfc289e5bc30745596d0f91146bcb77d6867f /lib/Transforms/Utils/SpecialCaseList.cpp
parent	aa80e61b0d79ddf9593f6217063574d0c66c3099 (diff)
download	external_llvm-acf4cf775763c6896f14f1d3a988058de05ca704.zip external_llvm-acf4cf775763c6896f14f1d3a988058de05ca704.tar.gz external_llvm-acf4cf775763c6896f14f1d3a988058de05ca704.tar.bz2