diff options
author | Douglas Gregor <doug.gregor@gmail.com> | 2009-12-30 17:23:44 +0000 |
---|---|---|
committer | Douglas Gregor <doug.gregor@gmail.com> | 2009-12-30 17:23:44 +0000 |
commit | 1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed (patch) | |
tree | fd7a1d57e11e237fab718c328d99e73a226d5dcf | |
parent | 1f66a30b946aa03cd89c2f0f7418bf64fe540508 (diff) | |
download | external_llvm-1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed.zip external_llvm-1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed.tar.gz external_llvm-1f69e49f5b7b8ed9761b1bff35b73eab1904c8ed.tar.bz2 |
Implement edit distance for StringRef
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@92309 91177308-0d34-0410-b5e6-96231b3b80d8
-rw-r--r-- | include/llvm/ADT/StringRef.h | 16 | ||||
-rw-r--r-- | lib/Support/StringRef.cpp | 31 |
2 files changed, 47 insertions, 0 deletions
diff --git a/include/llvm/ADT/StringRef.h b/include/llvm/ADT/StringRef.h index a744266..1c73836 100644 --- a/include/llvm/ADT/StringRef.h +++ b/include/llvm/ADT/StringRef.h @@ -133,6 +133,22 @@ namespace llvm { /// compare_lower - Compare two strings, ignoring case. int compare_lower(StringRef RHS) const; + /// \brief Determine the edit distance between this string and another + /// string. + /// + /// \param Other the string to compare this string against. + /// + /// \param AllowReplacements whether to allow character + /// replacements (change one character into another) as a single + /// operation, rather than as two operations (an insertion and a + /// removal). + /// + /// \returns the minimum number of character insertions, removals, + /// or (if \p AllowReplacements is \c true) replacements needed to + /// transform one of the given strings into the other. If zero, + /// the strings are identical. + unsigned edit_distance(StringRef Other, bool AllowReplacements = true); + /// str - Get the contents as an std::string. std::string str() const { return std::string(Data, Length); } diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index 2d023e4..9084ea6 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/ADT/StringRef.h" +#include <vector> using namespace llvm; // MSVC emits references to this into the translation units which reference it. @@ -35,6 +36,36 @@ int StringRef::compare_lower(StringRef RHS) const { return Length < RHS.Length ? -1 : 1; } +/// \brief Compute the edit distance between the two given strings. +unsigned StringRef::edit_distance(llvm::StringRef Other, + bool AllowReplacements) { + size_type m = size(); + size_type n = Other.size(); + + std::vector<unsigned> previous(n+1, 0); + for (std::vector<unsigned>::size_type i = 0; i <= n; ++i) + previous[i] = i; + + std::vector<unsigned> current(n+1, 0); + for (size_type y = 1; y <= m; ++y) { + current.assign(n+1, 0); + current[0] = y; + for (size_type x = 1; x <= n; ++x) { + if (AllowReplacements) { + current[x] = min(previous[x-1] + ((*this)[y-1] == Other[x-1]? 0u:1u), + min(current[x-1], previous[x])+1); + } + else { + if ((*this)[y-1] == Other[x-1]) current[x] = previous[x-1]; + else current[x] = min(current[x-1], previous[x]) + 1; + } + } + current.swap(previous); + } + + return previous[n]; +} + //===----------------------------------------------------------------------===// // String Searching //===----------------------------------------------------------------------===// |