|
12 | 12 | read_file_type/1, read_file_type/2, read_link_type/1, read_posix_mtime_and_size/1, |
13 | 13 | change_posix_time/2, change_universal_time/2, var_info/2, |
14 | 14 | guard_op/2, guard_info/1, extract_splat_guards/1, extract_guards/1, |
15 | | - erlang_comparison_op_to_elixir/1, erl_fa_to_elixir_fa/2, jaro_similarity/2]). |
| 15 | + erlang_comparison_op_to_elixir/1, erl_fa_to_elixir_fa/2]). |
16 | 16 | -include("elixir.hrl"). |
17 | 17 | -include_lib("kernel/include/file.hrl"). |
18 | 18 |
|
@@ -247,86 +247,3 @@ returns_boolean({'__block__', _, Exprs}) -> |
247 | 247 | returns_boolean(lists:last(Exprs)); |
248 | 248 |
|
249 | 249 | returns_boolean(_) -> false. |
250 | | - |
251 | | - |
252 | | -% TODO: Remove me when we require Erlang/OTP 27+ |
253 | | -% This is a polyfill for older versions, copying the code from |
254 | | -% https://github.com/erlang/otp/pull/7879 |
255 | | --spec jaro_similarity(String1, String2) -> Similarity when |
256 | | - String1 :: unicode:chardata(), |
257 | | - String2 :: unicode:chardata(), |
258 | | - Similarity :: float(). %% Between +0.0 and 1.0 |
259 | | -jaro_similarity(A0, B0) -> |
260 | | - {A, ALen} = str_to_gcl_and_length(A0), |
261 | | - {B, BLen} = str_to_indexmap(B0), |
262 | | - Dist = max(ALen, BLen) div 2, |
263 | | - {AM, BM} = jaro_match(A, B, -Dist, Dist, [], []), |
264 | | - if |
265 | | - ALen =:= 0 andalso BLen =:= 0 -> |
266 | | - 1.0; |
267 | | - ALen =:= 0 orelse BLen =:= 0 -> |
268 | | - 0.0; |
269 | | - AM =:= [] -> |
270 | | - 0.0; |
271 | | - true -> |
272 | | - {M,T} = jaro_calc_mt(AM, BM, 0, 0), |
273 | | - (M/ALen + M/BLen + (M-T/2)/M) / 3 |
274 | | - end. |
275 | | - |
276 | | -jaro_match([A|As], B0, Min, Max, AM, BM) -> |
277 | | - case jaro_detect(maps:get(A, B0, []), Min, Max) of |
278 | | - false -> |
279 | | - jaro_match(As, B0, Min+1, Max+1, AM, BM); |
280 | | - {J, Remain} -> |
281 | | - B = B0#{A => Remain}, |
282 | | - jaro_match(As, B, Min+1, Max+1, [A|AM], add_rsorted({J,A},BM)) |
283 | | - end; |
284 | | -jaro_match(_A, _B, _Min, _Max, AM, BM) -> |
285 | | - {AM, BM}. |
286 | | - |
287 | | -jaro_detect([Idx|Rest], Min, Max) when Min < Idx, Idx < Max -> |
288 | | - {Idx, Rest}; |
289 | | -jaro_detect([Idx|Rest], Min, Max) when Idx < Max -> |
290 | | - jaro_detect(Rest, Min, Max); |
291 | | -jaro_detect(_, _, _) -> |
292 | | - false. |
293 | | - |
294 | | -jaro_calc_mt([CharA|AM], [{_, CharA}|BM], M, T) -> |
295 | | - jaro_calc_mt(AM, BM, M+1, T); |
296 | | -jaro_calc_mt([_|AM], [_|BM], M, T) -> |
297 | | - jaro_calc_mt(AM, BM, M+1, T+1); |
298 | | -jaro_calc_mt([], [], M, T) -> |
299 | | - {M, T}. |
300 | | - |
301 | | - |
302 | | -%% Returns GC list and length |
303 | | -str_to_gcl_and_length(S0) -> |
304 | | - gcl_and_length(unicode_util:gc(S0), [], 0). |
305 | | - |
306 | | -gcl_and_length([C|Str], Acc, N) -> |
307 | | - gcl_and_length(unicode_util:gc(Str), [C|Acc], N+1); |
308 | | -gcl_and_length([], Acc, N) -> |
309 | | - {lists:reverse(Acc), N}; |
310 | | -gcl_and_length({error, Err}, _, _) -> |
311 | | - error({badarg, Err}). |
312 | | - |
313 | | -%% Returns GC map with index and length |
314 | | -str_to_indexmap(S) -> |
315 | | - [M|L] = str_to_map(unicode_util:gc(S), 0), |
316 | | - {M,L}. |
317 | | - |
318 | | -str_to_map([], L) -> [#{}|L]; |
319 | | -str_to_map([G | Gs], I) -> |
320 | | - [M|L] = str_to_map(unicode_util:gc(Gs), I+1), |
321 | | - [maps:put(G, [I | maps:get(G, M, [])], M)| L]; |
322 | | -str_to_map({error,Error}, _) -> |
323 | | - error({badarg, Error}). |
324 | | - |
325 | | -%% Add in decreasing order |
326 | | -add_rsorted(A, [H|_]=BM) when A > H -> |
327 | | - [A|BM]; |
328 | | -add_rsorted(A, [H|BM]) -> |
329 | | - [H|add_rsorted(A,BM)]; |
330 | | -add_rsorted(A, []) -> |
331 | | - [A]. |
332 | | - |
0 commit comments