Skip to content

Commit 9584aba

Browse files
author
Krzysztof Czajkowski
committed
Simplify exposed functions
1 parent be609ee commit 9584aba

6 files changed

Lines changed: 127 additions & 101 deletions

File tree

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ import editdistance.osa
3737
# Example usage:
3838
str1 = "kitten"
3939
str2 = "sitting"
40-
distance = editdistance.osa.calculate_distance(str1, str2)
40+
distance = editdistance.osa.calculate_distance(str1, str2, tranpose_weight=0.1)
4141
print(f"The edit distance between '{{}}' and '{{}}' is {{}}".format(str1, str2, distance))
4242
```
4343

editdistance/_edit_distance_osa.cpp

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
std::vector<std::vector<double>> compute_dp_table(
66
const std::string& a,
77
const std::string& b,
8-
const std::map<EditopName, double>& cost_map
8+
const std::map<CppEditopName, double>& cost_map
99
) {
1010
int len_a = a.length();
1111
int len_b = b.length();
@@ -42,42 +42,42 @@ std::vector<std::vector<double>> compute_dp_table(
4242
double cpp_compute_distance(
4343
const std::string& a,
4444
const std::string& b,
45-
const std::map<EditopName, double>& cost_map
45+
const std::map<CppEditopName, double>& cost_map
4646
) {
4747
auto dp = compute_dp_table(a, b, cost_map);
4848
return dp[a.length()][b.length()];
4949
}
5050

51-
std::vector<std::vector<Editop>> backtrack_all_paths(
51+
std::vector<std::vector<CppEditop>> backtrack_all_paths(
5252
const std::string& a,
5353
const std::string& b,
54-
const std::map<EditopName, double>& cost_map,
54+
const std::map<CppEditopName, double>& cost_map,
5555
const std::vector<std::vector<double>>& dp,
5656
int i,
5757
int j,
58-
std::vector<Editop>& current_path
58+
std::vector<CppEditop>& current_path
5959
) {
6060
if (i == 0 && j == 0) {
61-
std::vector<Editop> reversed_path = current_path;
61+
std::vector<CppEditop> reversed_path = current_path;
6262
std::reverse(reversed_path.begin(), reversed_path.end());
6363
return {reversed_path};
6464
}
6565

66-
std::vector<std::vector<Editop>> all_paths;
66+
std::vector<std::vector<CppEditop>> all_paths;
6767
double current_cost = dp[i][j];
6868
const double tol = 1e-6;
6969

7070

7171
if (i > 0 && std::abs((dp[i-1][j] + cost_map.at(DELETE)) - current_cost) < tol) {
72-
Editop op(DELETE, i-1, i-1, cost_map.at(DELETE), std::string(1, a[i-1]));
72+
CppEditop op(DELETE, i-1, i-1, cost_map.at(DELETE), std::string(1, a[i-1]));
7373
current_path.push_back(op);
7474
auto paths = backtrack_all_paths(a, b, cost_map, dp, i-1, j, current_path);
7575
all_paths.insert(all_paths.end(), paths.begin(), paths.end());
7676
current_path.pop_back();
7777
}
7878

7979
if (j > 0 && std::abs((dp[i][j-1] + cost_map.at(INSERT)) - current_cost) < tol) {
80-
Editop op(INSERT, i, i, cost_map.at(INSERT), std::string(1, b[j-1]));
80+
CppEditop op(INSERT, i, i, cost_map.at(INSERT), std::string(1, b[j-1]));
8181
current_path.push_back(op);
8282
auto paths = backtrack_all_paths(a, b, cost_map, dp, i, j-1, current_path);
8383
all_paths.insert(all_paths.end(), paths.begin(), paths.end());
@@ -89,7 +89,7 @@ std::vector<std::vector<Editop>> backtrack_all_paths(
8989
double sub_cost = (a[i-1] == b[j-1]) ? 0.0 : cost_map.at(REPLACE);
9090
if (std::abs((dp[i-1][j-1] + sub_cost) - current_cost) < tol) {
9191
std::string out_char = (sub_cost == 0.0) ? std::string(1, a[i-1]) : std::string(1, b[j-1]);
92-
Editop op(REPLACE, i-1, j-1, sub_cost, out_char);
92+
CppEditop op(REPLACE, i-1, j-1, sub_cost, out_char);
9393
current_path.push_back(op);
9494
auto paths = backtrack_all_paths(a, b, cost_map, dp, i-1, j-1, current_path);
9595
all_paths.insert(all_paths.end(), paths.begin(), paths.end());
@@ -102,7 +102,7 @@ std::vector<std::vector<Editop>> backtrack_all_paths(
102102
a[i-1] == b[j-2] && a[i-2] == b[j-1] &&
103103
std::abs((dp[i-2][j-2] + cost_map.at(TRANSPOSE)) - current_cost) < tol) {
104104
std::string transpose_str = std::string(1, b[j-2]) + std::string(1, b[j-1]);
105-
Editop op(TRANSPOSE, i-2, j-2, cost_map.at(TRANSPOSE), transpose_str);
105+
CppEditop op(TRANSPOSE, i-2, j-2, cost_map.at(TRANSPOSE), transpose_str);
106106
current_path.push_back(op);
107107
auto paths = backtrack_all_paths(a, b, cost_map, dp, i-2, j-2, current_path);
108108
all_paths.insert(all_paths.end(), paths.begin(), paths.end());
@@ -113,21 +113,21 @@ std::vector<std::vector<Editop>> backtrack_all_paths(
113113
}
114114

115115

116-
std::vector<std::vector<Editop>> cpp_compute_all_paths(
116+
std::vector<std::vector<CppEditop>> cpp_compute_all_paths(
117117
const std::string& a,
118118
const std::string& b,
119-
const std::map<EditopName, double>& cost_map
119+
const std::map<CppEditopName, double>& cost_map
120120
) {
121121
auto dp = compute_dp_table(a, b, cost_map);
122-
std::vector<Editop> current_path;
122+
std::vector<CppEditop> current_path;
123123
return backtrack_all_paths(a, b, cost_map, dp, a.length(), b.length(), current_path);
124124
}
125125

126126

127127
void cpp_print_all_paths(
128128
const std::string& a,
129129
const std::string& b,
130-
const std::map<EditopName, double>& cost_map
130+
const std::map<CppEditopName, double>& cost_map
131131
) {
132132
auto paths = cpp_compute_all_paths(a, b, cost_map);
133133
double distance = cpp_compute_distance(a, b, cost_map);
@@ -145,7 +145,7 @@ void cpp_print_all_paths(
145145
}
146146
}
147147

148-
std::string editop_name_to_string(EditopName name) {
148+
std::string editop_name_to_string(CppEditopName name) {
149149
switch (name) {
150150
case INSERT: return "INSERT";
151151
case DELETE: return "DELETE";
@@ -155,8 +155,8 @@ std::string editop_name_to_string(EditopName name) {
155155
}
156156
}
157157

158-
std::ostream& operator<<(std::ostream& os, const Editop& op) {
159-
os << "Editop(name=" << editop_name_to_string(op.name)
158+
std::ostream& operator<<(std::ostream& os, const CppEditop& op) {
159+
os << "CppEditop(name=" << editop_name_to_string(op.name)
160160
<< ", src_idx=" << op.src_idx
161161
<< ", dst_idx=" << op.dst_idx
162162
<< ", cost=" << op.cost

editdistance/_edit_distance_osa.hpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -7,66 +7,66 @@
77
#include <iostream>
88

99

10-
enum EditopName {
10+
enum CppEditopName {
1111
INSERT,
1212
DELETE,
1313
REPLACE,
1414
TRANSPOSE
1515
};
1616

17-
struct Editop {
18-
EditopName name;
17+
struct CppEditop {
18+
CppEditopName name;
1919
int src_idx;
2020
int dst_idx;
2121
double cost;
2222
std::string output_string;
2323

24-
Editop() : name(INSERT), src_idx(0), dst_idx(0), cost(0.0), output_string("") {}
25-
Editop(EditopName n, int si, int di, double c, const std::string& os)
24+
CppEditop() : name(INSERT), src_idx(0), dst_idx(0), cost(0.0), output_string("") {}
25+
CppEditop(CppEditopName n, int si, int di, double c, const std::string& os)
2626
: name(n), src_idx(si), dst_idx(di), cost(c), output_string(os) {}
2727
};
2828

2929

3030
std::vector<std::vector<double>> compute_dp_table(
3131
const std::string& a,
3232
const std::string& b,
33-
const std::map<EditopName, double>& cost_map
33+
const std::map<CppEditopName, double>& cost_map
3434
);
3535

3636

3737
double cpp_compute_distance(
3838
const std::string& a,
3939
const std::string& b,
40-
const std::map<EditopName, double>& cost_map
40+
const std::map<CppEditopName, double>& cost_map
4141
);
4242

4343

44-
std::vector<std::vector<Editop>> backtrack_all_paths(
44+
std::vector<std::vector<CppEditop>> backtrack_all_paths(
4545
const std::string& a,
4646
const std::string& b,
47-
const std::map<EditopName, double>& cost_map,
47+
const std::map<CppEditopName, double>& cost_map,
4848
const std::vector<std::vector<double>>& dp,
4949
int i,
5050
int j,
51-
std::vector<Editop>& current_path
51+
std::vector<CppEditop>& current_path
5252
);
5353

5454

55-
std::vector<std::vector<Editop>> cpp_compute_all_paths(
55+
std::vector<std::vector<CppEditop>> cpp_compute_all_paths(
5656
const std::string& a,
5757
const std::string& b,
58-
const std::map<EditopName, double>& cost_map
58+
const std::map<CppEditopName, double>& cost_map
5959
);
6060

6161

6262
void cpp_print_all_paths(
6363
const std::string& a,
6464
const std::string& b,
65-
const std::map<EditopName, double>& cost_map
65+
const std::map<CppEditopName, double>& cost_map
6666
);
6767

6868

69-
std::string editop_name_to_string(EditopName name);
70-
std::ostream& operator<<(std::ostream& os, const Editop& op);
69+
std::string editop_name_to_string(CppEditopName name);
70+
std::ostream& operator<<(std::ostream& os, const CppEditop& op);
7171

7272
#endif // EDIT_DISTANCE_OSA_HPP

editdistance/edit_distance_osa.pyx

Lines changed: 73 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,32 @@ from enum import Enum
99

1010

1111
cdef extern from "_edit_distance_osa.hpp":
12-
cdef enum EditopName:
12+
cdef enum CppEditopName:
1313
INSERT
1414
DELETE
1515
REPLACE
1616
TRANSPOSE
1717

18-
cdef struct Editop:
19-
EditopName name
18+
cdef struct CppEditop:
19+
CppEditopName name
2020
int src_idx
2121
int dst_idx
2222
double cost
2323
string output_string
2424

25-
vector[vector[Editop]] cpp_compute_all_paths(const string& a, const string& b, const map[EditopName, double]& cost_map)
26-
void cpp_print_all_paths(const string& a, const string& b, const map[EditopName, double]& cost_map)
27-
double cpp_compute_distance(const string& a, const string& b, const map[EditopName, double]& cost_map)
25+
vector[vector[CppEditop]] cpp_compute_all_paths(const string& a, const string& b, const map[CppEditopName, double]& cost_map)
26+
void cpp_print_all_paths(const string& a, const string& b, const map[CppEditopName, double]& cost_map)
27+
double cpp_compute_distance(const string& a, const string& b, const map[CppEditopName, double]& cost_map)
2828

2929

30-
class PyEditopName(Enum):
30+
class EditopName(Enum):
3131
INSERT = 0
3232
DELETE = 1
3333
REPLACE = 2
3434
TRANSPOSE = 3
3535

3636

37-
cdef class PyEditop:
37+
cdef class Editop:
3838
cdef readonly object name
3939
cdef readonly int src_idx
4040
cdef readonly int dst_idx
@@ -52,41 +52,56 @@ cdef class PyEditop:
5252
return f"Editop(name={self.name}, src_idx={self.src_idx}, dst_idx={self.dst_idx}, cost={self.cost}, output_string='{self.output_string}')"
5353

5454

55-
cdef map[EditopName, double] _convert_cost_map(dict cost_map):
56-
cdef map[EditopName, double] cpp_cost_map
57-
if PyEditopName.INSERT in cost_map:
58-
cpp_cost_map[INSERT] = cost_map[PyEditopName.INSERT]
59-
if PyEditopName.DELETE in cost_map:
60-
cpp_cost_map[DELETE] = cost_map[PyEditopName.DELETE]
61-
if PyEditopName.REPLACE in cost_map:
62-
cpp_cost_map[REPLACE] = cost_map[PyEditopName.REPLACE]
63-
if PyEditopName.TRANSPOSE in cost_map:
64-
cpp_cost_map[TRANSPOSE] = cost_map[PyEditopName.TRANSPOSE]
55+
cdef map[CppEditopName, double] _convert_cost_map(dict cost_map):
56+
cdef map[CppEditopName, double] cpp_cost_map
57+
if EditopName.INSERT in cost_map:
58+
cpp_cost_map[INSERT] = cost_map[EditopName.INSERT]
59+
if EditopName.DELETE in cost_map:
60+
cpp_cost_map[DELETE] = cost_map[EditopName.DELETE]
61+
if EditopName.REPLACE in cost_map:
62+
cpp_cost_map[REPLACE] = cost_map[EditopName.REPLACE]
63+
if EditopName.TRANSPOSE in cost_map:
64+
cpp_cost_map[TRANSPOSE] = cost_map[EditopName.TRANSPOSE]
6565
return cpp_cost_map
6666

6767

68-
def compute_with_all_paths(str a, str b, dict cost_map):
68+
def get_all_paths(
69+
str a,
70+
str b,
71+
double replace_weight=1.0,
72+
double insert_weight=1.0,
73+
double delete_weight=1.0,
74+
double transpose_weight=1.0
75+
):
76+
cdef dict cost_map = {
77+
EditopName.REPLACE: replace_weight,
78+
EditopName.INSERT: insert_weight,
79+
EditopName.DELETE: delete_weight,
80+
EditopName.TRANSPOSE: transpose_weight
81+
}
6982
cdef string cpp_a = a.encode("utf-8")
7083
cdef string cpp_b = b.encode("utf-8")
71-
cdef map[EditopName, double] cpp_cost_map = _convert_cost_map(cost_map)
72-
cdef vector[vector[Editop]] cpp_paths = cpp_compute_all_paths(cpp_a, cpp_b, cpp_cost_map)
84+
cdef map[CppEditopName, double] cpp_cost_map = _convert_cost_map(cost_map)
85+
cdef vector[vector[CppEditop]] cpp_paths = cpp_compute_all_paths(cpp_a, cpp_b, cpp_cost_map)
7386
python_paths = []
74-
cdef vector[Editop] cpp_path
75-
cdef Editop cpp_op
87+
cdef vector[CppEditop] cpp_path
88+
cdef CppEditop cpp_op
7689
for cpp_path in cpp_paths:
7790
python_path = []
7891
for cpp_op in cpp_path:
92+
if cpp_op.cost == 0:
93+
continue
7994
if cpp_op.name == INSERT:
80-
py_name = PyEditopName.INSERT
95+
py_name = EditopName.INSERT
8196
elif cpp_op.name == DELETE:
82-
py_name = PyEditopName.DELETE
97+
py_name = EditopName.DELETE
8398
elif cpp_op.name == REPLACE:
84-
py_name = PyEditopName.REPLACE
99+
py_name = EditopName.REPLACE
85100
elif cpp_op.name == TRANSPOSE:
86-
py_name = PyEditopName.TRANSPOSE
101+
py_name = EditopName.TRANSPOSE
87102
else:
88103
py_name = None
89-
python_path.append(PyEditop(
104+
python_path.append(Editop(
90105
py_name,
91106
cpp_op.src_idx,
92107
cpp_op.dst_idx,
@@ -97,15 +112,41 @@ def compute_with_all_paths(str a, str b, dict cost_map):
97112
return python_paths
98113

99114

100-
def print_all_paths(str a, str b, dict cost_map):
115+
def print_all_paths(
116+
str a,
117+
str b,
118+
double replace_weight=1.0,
119+
double insert_weight=1.0,
120+
double delete_weight=1.0,
121+
double transpose_weight=1.0
122+
):
123+
cdef dict cost_map = {
124+
EditopName.REPLACE: replace_weight,
125+
EditopName.INSERT: insert_weight,
126+
EditopName.DELETE: delete_weight,
127+
EditopName.TRANSPOSE: transpose_weight
128+
}
101129
cdef string cpp_a = a.encode("utf-8")
102130
cdef string cpp_b = b.encode("utf-8")
103-
cdef map[EditopName, double] cpp_cost_map = _convert_cost_map(cost_map)
131+
cdef map[CppEditopName, double] cpp_cost_map = _convert_cost_map(cost_map)
104132
cpp_print_all_paths(cpp_a, cpp_b, cpp_cost_map)
105133

106134

107-
def compute_distance(str a, str b, dict cost_map):
135+
def compute_distance(
136+
str a,
137+
str b,
138+
double replace_weight=1.0,
139+
double insert_weight=1.0,
140+
double delete_weight=1.0,
141+
double transpose_weight=1.0
142+
):
143+
cdef dict cost_map = {
144+
EditopName.REPLACE: replace_weight,
145+
EditopName.INSERT: insert_weight,
146+
EditopName.DELETE: delete_weight,
147+
EditopName.TRANSPOSE: transpose_weight
148+
}
108149
cdef string cpp_a = a.encode("utf-8")
109150
cdef string cpp_b = b.encode("utf-8")
110-
cdef map[EditopName, double] cpp_cost_map = _convert_cost_map(cost_map)
151+
cdef map[CppEditopName, double] cpp_cost_map = _convert_cost_map(cost_map)
111152
return cpp_compute_distance(cpp_a, cpp_b, cpp_cost_map)

0 commit comments

Comments
 (0)