aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDouglas Gregor <doug.gregor@gmail.com>2009-12-30 17:23:44 +0000
committerDouglas Gregor <doug.gregor@gmail.com>2009-12-30 17:23:44 +0000
commit1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed (patch)
treefd7a1d57e11e237fab718c328d99e73a226d5dcf
parent1f66a30b946aa03cd89c2f0f7418bf64fe540508 (diff)
downloadexternal_llvm-1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed.zip
external_llvm-1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed.tar.gz
external_llvm-1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed.tar.bz2
Implement edit distance for StringRef
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92309 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r--include/llvm/ADT/StringRef.h16
-rw-r--r--lib/Support/StringRef.cpp31
2 files changed, 47 insertions, 0 deletions
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h
index a744266..1c73836 100644
--- a/include/llvm/ADT/StringRef.h
+++ b/include/llvm/ADT/StringRef.h
@@ -133,6 +133,22 @@ namespace llvm {
/// compare_lower - Compare two strings, ignoring case.
int compare_lower(StringRef RHS) const;
+ /// \brief Determine the edit distance between this string and another
+ /// string.
+ ///
+ /// \param Other the string to compare this string against.
+ ///
+ /// \param AllowReplacements whether to allow character
+ /// replacements (change one character into another) as a single
+ /// operation, rather than as two operations (an insertion and a
+ /// removal).
+ ///
+ /// \returns the minimum number of character insertions, removals,
+ /// or (if \p AllowReplacements is \c true) replacements needed to
+ /// transform one of the given strings into the other. If zero,
+ /// the strings are identical.
+ unsigned edit_distance(StringRef Other, bool AllowReplacements = true);
+
/// str - Get the contents as an std::string.
std::string str() const { return std::string(Data, Length); }
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index 2d023e4..9084ea6 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
+#include <vector>
using namespace llvm;
// MSVC emits references to this into the translation units which reference it.
@@ -35,6 +36,36 @@ int StringRef::compare_lower(StringRef RHS) const {
return Length < RHS.Length ? -1 : 1;
}
+/// \brief Compute the edit distance between the two given strings.
+unsigned StringRef::edit_distance(llvm::StringRef Other,
+ bool AllowReplacements) {
+ size_type m = size();
+ size_type n = Other.size();
+
+ std::vector<unsigned> previous(n+1, 0);
+ for (std::vector<unsigned>::size_type i = 0; i <= n; ++i)
+ previous[i] = i;
+
+ std::vector<unsigned> current(n+1, 0);
+ for (size_type y = 1; y <= m; ++y) {
+ current.assign(n+1, 0);
+ current[0] = y;
+ for (size_type x = 1; x <= n; ++x) {
+ if (AllowReplacements) {
+ current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u),
+ min(current[x-1], previous[x])+1);
+ }
+ else {
+ if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1];
+ else current[x] = min(current[x-1], previous[x]) + 1;
+ }
+ }
+ current.swap(previous);
+ }
+
+ return previous[n];
+}
+
//===----------------------------------------------------------------------===//
// String Searching
//===----------------------------------------------------------------------===//