aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/llvm/Support/Regex.h15
-rw-r--r--lib/Support/Regex.cpp4
-rw-r--r--lib/Support/regcomp.c30
-rw-r--r--unittests/Support/RegexTest.cpp23
4 files changed, 64 insertions, 8 deletions
diff --git a/include/llvm/Support/Regex.h b/include/llvm/Support/Regex.h
index ffe09b1..82df2c6 100644
--- a/include/llvm/Support/Regex.h
+++ b/include/llvm/Support/Regex.h
@@ -7,7 +7,10 @@
//
//===----------------------------------------------------------------------===//
//
-// This file implements a POSIX regular expression matcher.
+// This file implements a POSIX regular expression matcher. Both Basic and
+// Extended POSIX regular expressions (ERE) are supported. EREs were extended
+// to support backreferences in matches.
+// This implementation also supports matching strings with embedded NUL chars.
//
//===----------------------------------------------------------------------===//
@@ -33,12 +36,14 @@ namespace llvm {
/// null string after any newline in the string in addition to its normal
/// function, and the $ anchor matches the null string before any
/// newline in the string in addition to its normal function.
- Newline=2
+ Newline=2,
+ /// By default, the POSIX extended regular expression (ERE) syntax is
+ /// assumed. Pass this flag to turn on basic regular expressions (BRE)
+ /// instead.
+ BasicRegex=4
};
- /// Compiles the given POSIX Extended Regular Expression \p Regex.
- /// This implementation supports regexes and matching strings with embedded
- /// NUL characters.
+ /// Compiles the given regular expression \p Regex.
Regex(StringRef Regex, unsigned Flags = NoFlags);
~Regex();
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index d293da0..0a5479a 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -27,7 +27,9 @@ Regex::Regex(StringRef regex, unsigned Flags) {
flags |= REG_ICASE;
if (Flags & Newline)
flags |= REG_NEWLINE;
- error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
+ if (!(Flags & BasicRegex))
+ flags |= REG_EXTENDED;
+ error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
}
Regex::~Regex() {
diff --git a/lib/Support/regcomp.c b/lib/Support/regcomp.c
index 46c91a9..74d9186 100644
--- a/lib/Support/regcomp.c
+++ b/lib/Support/regcomp.c
@@ -303,6 +303,7 @@ p_ere_exp(struct parse *p)
sopno pos;
int count;
int count2;
+ int backrefnum;
sopno subno;
int wascaret = 0;
@@ -370,7 +371,34 @@ p_ere_exp(struct parse *p)
case '\\':
REQUIRE(MORE(), REG_EESCAPE);
c = GETNEXT();
- ordinary(p, c);
+ if (c >= '1' && c <= '9') {
+ /* \[0-9] is taken to be a back-reference to a previously specified
+ * matching group. backrefnum will hold the number. The matching
+ * group must exist (i.e. if \4 is found there must have been at
+ * least 4 matching groups specified in the pattern previously).
+ */
+ backrefnum = c - '0';
+ if (p->pend[backrefnum] == 0) {
+ SETERROR(REG_ESUBREG);
+ break;
+ }
+
+ /* Make sure everything checks out and emit the sequence
+ * that marks a back-reference to the parse structure.
+ */
+ assert(backrefnum <= p->g->nsub);
+ EMIT(OBACK_, backrefnum);
+ assert(p->pbegin[backrefnum] != 0);
+ assert(OP(p->strip[p->pbegin[backrefnum]]) != OLPAREN);
+ assert(OP(p->strip[p->pend[backrefnum]]) != ORPAREN);
+ (void) dupl(p, p->pbegin[backrefnum]+1, p->pend[backrefnum]);
+ EMIT(O_BACK, backrefnum);
+ p->g->backrefs = 1;
+ } else {
+ /* Other chars are simply themselves when escaped with a backslash.
+ */
+ ordinary(p, c);
+ }
break;
case '{': /* okay as ordinary except if digit follows */
REQUIRE(!MORE() || !isdigit((uch)PEEK()), REG_BADRPT);
diff --git a/unittests/Support/RegexTest.cpp b/unittests/Support/RegexTest.cpp
index 65b66c3..38d9595 100644
--- a/unittests/Support/RegexTest.cpp
+++ b/unittests/Support/RegexTest.cpp
@@ -51,7 +51,6 @@ TEST_F(RegexTest, Basics) {
EXPECT_EQ(1u, Matches.size());
EXPECT_EQ(String, Matches[0].str());
-
std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
String="YX99a:513b";
NulPattern[7] = '\0';
@@ -62,6 +61,28 @@ TEST_F(RegexTest, Basics) {
EXPECT_TRUE(r5.match(String));
}
+TEST_F(RegexTest, Backreferences) {
+ Regex r1("([a-z]+)_\\1");
+ SmallVector<StringRef, 4> Matches;
+ EXPECT_TRUE(r1.match("abc_abc", &Matches));
+ EXPECT_EQ(2u, Matches.size());
+ EXPECT_FALSE(r1.match("abc_ab", &Matches));
+
+ Regex r2("a([0-9])b\\1c\\1");
+ EXPECT_TRUE(r2.match("a4b4c4", &Matches));
+ EXPECT_EQ(2u, Matches.size());
+ EXPECT_EQ("4", Matches[1].str());
+ EXPECT_FALSE(r2.match("a2b2c3"));
+
+ Regex r3("a([0-9])([a-z])b\\1\\2");
+ EXPECT_TRUE(r3.match("a6zb6z", &Matches));
+ EXPECT_EQ(3u, Matches.size());
+ EXPECT_EQ("6", Matches[1].str());
+ EXPECT_EQ("z", Matches[2].str());
+ EXPECT_FALSE(r3.match("a6zb6y"));
+ EXPECT_FALSE(r3.match("a6zb7z"));
+}
+
TEST_F(RegexTest, Substitution) {
std::string Error;