1 | #include "caffe2/utils/string_utils.h" |
2 | |
3 | #include <algorithm> |
4 | #include <sstream> |
5 | #include <vector> |
6 | |
7 | namespace caffe2 { |
8 | |
9 | std::vector<std::string> |
10 | split(char separator, const std::string& string, bool ignore_empty) { |
11 | std::vector<std::string> pieces; |
12 | std::stringstream ss(string); |
13 | std::string item; |
14 | while (getline(ss, item, separator)) { |
15 | if (!ignore_empty || !item.empty()) { |
16 | pieces.push_back(std::move(item)); |
17 | } |
18 | } |
19 | return pieces; |
20 | } |
21 | |
22 | std::string trim(const std::string& str) { |
23 | size_t left = str.find_first_not_of(' '); |
24 | if (left == std::string::npos) { |
25 | return str; |
26 | } |
27 | size_t right = str.find_last_not_of(' '); |
28 | return str.substr(left, (right - left + 1)); |
29 | } |
30 | |
31 | size_t editDistance( |
32 | const std::string& s1, const std::string& s2, size_t max_distance) |
33 | { |
34 | std::vector<size_t> current(s1.length() + 1); |
35 | std::vector<size_t> previous(s1.length() + 1); |
36 | std::vector<size_t> previous1(s1.length() + 1); |
37 | |
38 | return editDistanceHelper( |
39 | s1.c_str(), |
40 | s1.length(), |
41 | s2.c_str(), |
42 | s2.length(), |
43 | current, |
44 | previous, |
45 | previous1, |
46 | max_distance |
47 | ); |
48 | } |
49 | #define NEXT_UNSAFE(s, i, c) { \ |
50 | (c)=(uint8_t)(s)[(i)++]; \ |
51 | } |
52 | |
53 | int32_t editDistanceHelper(const char* s1, |
54 | size_t s1_len, |
55 | const char* s2, |
56 | size_t s2_len, |
57 | std::vector<size_t> ¤t, |
58 | std::vector<size_t> &previous, |
59 | std::vector<size_t> &previous1, |
60 | size_t max_distance) { |
61 | if (max_distance) { |
62 | if (std::max(s1_len, s2_len) - std::min(s1_len, s2_len) > max_distance) { |
63 | return max_distance+1; |
64 | } |
65 | } |
66 | |
67 | for (size_t j = 0; j <= s1_len; ++j) { |
68 | current[j] = j; |
69 | } |
70 | |
71 | int32_t str2_offset = 0; |
72 | char prev2 = 0; |
73 | for (size_t i = 1; i <= s2_len; ++i) { |
74 | swap(previous1, previous); |
75 | swap(current, previous); |
76 | current[0] = i; |
77 | |
78 | // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) |
79 | char c2 = s2[str2_offset]; |
80 | char prev1 = 0; |
81 | int32_t str1_offset = 0; |
82 | |
83 | NEXT_UNSAFE(s2, str2_offset, c2); |
84 | |
85 | size_t current_min = s1_len; |
86 | for (size_t j = 1; j <= s1_len; ++j) { |
87 | size_t insertion = previous[j] + 1; |
88 | size_t deletion = current[j - 1] + 1; |
89 | size_t substitution = previous[j - 1]; |
90 | size_t transposition = insertion; |
91 | // NOLINTNEXTLINE(clang-analyzer-deadcode.DeadStores) |
92 | char c1 = s1[str1_offset]; |
93 | |
94 | NEXT_UNSAFE(s1, str1_offset, c1); |
95 | |
96 | if (c1 != c2) { |
97 | substitution += 1; |
98 | } |
99 | |
100 | |
101 | if (prev1 == c2 && prev2 == c1 && j > 1 && i > 1) { |
102 | transposition = previous1[j - 2] + 1; |
103 | } |
104 | prev1 = c1; |
105 | |
106 | current[j] = std::min(std::min(insertion, deletion), |
107 | std::min(substitution, transposition)); |
108 | current_min = std::min(current_min, current[j]); |
109 | } |
110 | |
111 | |
112 | if (max_distance != 0 && current_min > max_distance) { |
113 | return max_distance+1; |
114 | } |
115 | |
116 | prev2 = c2; |
117 | } |
118 | |
119 | return current[s1_len]; |
120 | } |
121 | } // namespace caffe2 |
122 | |