aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDaniel Dunbar <daniel@zuster.org>2010-02-17 20:08:42 +0000
committerDaniel Dunbar <daniel@zuster.org>2010-02-17 20:08:42 +0000
commitd2a5c0d8562407f9acab97451a785b513edd4c9b (patch)
tree0ceb19ccf4414d45f6c514e20dbe3db6a7d5d719
parentb886089645a144babe64067928acc19814434775 (diff)
downloadexternal_llvm-d2a5c0d8562407f9acab97451a785b513edd4c9b.zip
external_llvm-d2a5c0d8562407f9acab97451a785b513edd4c9b.tar.gz
external_llvm-d2a5c0d8562407f9acab97451a785b513edd4c9b.tar.bz2
Add Regex::sub, for doing regular expression substitution with backreferences.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@96503 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/Support/Regex.h13
-rw-r--r--lib/Support/Regex.cpp76
-rw-r--r--unittests/Support/RegexTest.cpp29
3 files changed, 118 insertions, 0 deletions
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index c954c0d..591af00 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -56,6 +56,19 @@ namespace llvm {
///
/// This returns true on a successful match.
bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
+
+ /// sub - Return the result of replacing the first match of the regex in
+ /// \arg String with the \arg Repl string. Backreferences like "\0" in the
+ /// replacement string are replaced with the appropriate match substring.
+ ///
+ /// Note that the replacement string has backslash escaping performed on
+ /// it. Invalid backreferences are ignored (replaced by empty strings).
+ ///
+ /// \param Error If non-null, any errors in the substitution (invalid
+ /// backreferences, trailing backslashes) will be recorded as a non-empty
+ /// string.
+ std::string sub(StringRef Repl, StringRef String, std::string *Error = 0);
+
private:
struct llvm_regex *preg;
int error;
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 618ca05..a7631de 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -90,3 +90,79 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
return true;
}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = "invalid backreference string '" + Ref.str() + "'";
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
diff --git a/unittests/Support/RegexTest.cpp b/unittests/Support/RegexTest.cpp
index 44c7e55..65b66c3 100644
--- a/unittests/Support/RegexTest.cpp
+++ b/unittests/Support/RegexTest.cpp
@@ -62,4 +62,33 @@ TEST_F(RegexTest, Basics) {
EXPECT_TRUE(r5.match(String));
}
+TEST_F(RegexTest, Substitution) {
+ std::string Error;
+
+ EXPECT_EQ("aNUMber", Regex("[0-9]+").sub("NUM", "a1234ber"));
+
+ // Standard Escapes
+ EXPECT_EQ("a\\ber", Regex("[0-9]+").sub("\\\\", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+ EXPECT_EQ("a\nber", Regex("[0-9]+").sub("\\n", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+ EXPECT_EQ("a\tber", Regex("[0-9]+").sub("\\t", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+ EXPECT_EQ("ajber", Regex("[0-9]+").sub("\\j", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+
+ EXPECT_EQ("aber", Regex("[0-9]+").sub("\\", "a1234ber", &Error));
+ EXPECT_EQ(Error, "replacement string contained trailing backslash");
+
+ // Backreferences
+ EXPECT_EQ("aa1234bber", Regex("a[0-9]+b").sub("a\\0b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+
+ EXPECT_EQ("a1234ber", Regex("a([0-9]+)b").sub("a\\1b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "");
+
+ EXPECT_EQ("aber", Regex("a[0-9]+b").sub("a\\100b", "a1234ber", &Error));
+ EXPECT_EQ(Error, "invalid backreference string '100'");
+}
+
}