1 | // Copyright 2011 Google Inc. All Rights Reserved. |
---|---|

2 | // |

3 | // Licensed under the Apache License, Version 2.0 (the "License"); |

4 | // you may not use this file except in compliance with the License. |

5 | // You may obtain a copy of the License at |

6 | // |

7 | // http://www.apache.org/licenses/LICENSE-2.0 |

8 | // |

9 | // Unless required by applicable law or agreed to in writing, software |

10 | // distributed under the License is distributed on an "AS IS" BASIS, |

11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |

12 | // See the License for the specific language governing permissions and |

13 | // limitations under the License. |

14 | |

15 | #include "edit_distance.h" |

16 | |

17 | #include <algorithm> |

18 | #include <vector> |

19 | |

20 | using namespace std; |

21 | |

22 | int EditDistance(const StringPiece& s1, |

23 | const StringPiece& s2, |

24 | bool allow_replacements, |

25 | int max_edit_distance) { |

26 | // The algorithm implemented below is the "classic" |

27 | // dynamic-programming algorithm for computing the Levenshtein |

28 | // distance, which is described here: |

29 | // |

30 | // http://en.wikipedia.org/wiki/Levenshtein_distance |

31 | // |

32 | // Although the algorithm is typically described using an m x n |

33 | // array, only one row plus one element are used at a time, so this |

34 | // implementation just keeps one vector for the row. To update one entry, |

35 | // only the entries to the left, top, and top-left are needed. The left |

36 | // entry is in row[x-1], the top entry is what's in row[x] from the last |

37 | // iteration, and the top-left entry is stored in previous. |

38 | int m = s1.len_; |

39 | int n = s2.len_; |

40 | |

41 | vector<int> row(n + 1); |

42 | for (int i = 1; i <= n; ++i) |

43 | row[i] = i; |

44 | |

45 | for (int y = 1; y <= m; ++y) { |

46 | row[0] = y; |

47 | int best_this_row = row[0]; |

48 | |

49 | int previous = y - 1; |

50 | for (int x = 1; x <= n; ++x) { |

51 | int old_row = row[x]; |

52 | if (allow_replacements) { |

53 | row[x] = min(previous + (s1.str_[y - 1] == s2.str_[x - 1] ? 0 : 1), |

54 | min(row[x - 1], row[x]) + 1); |

55 | } |

56 | else { |

57 | if (s1.str_[y - 1] == s2.str_[x - 1]) |

58 | row[x] = previous; |

59 | else |

60 | row[x] = min(row[x - 1], row[x]) + 1; |

61 | } |

62 | previous = old_row; |

63 | best_this_row = min(best_this_row, row[x]); |

64 | } |

65 | |

66 | if (max_edit_distance && best_this_row > max_edit_distance) |

67 | return max_edit_distance + 1; |

68 | } |

69 | |

70 | return row[n]; |

71 | } |

72 |