Skip to content

Commit 0741a5d

Browse files
committed
Remove :elixir_utils.jaro_similarity polyfill
1 parent 22ea4da commit 0741a5d

2 files changed

Lines changed: 2 additions & 86 deletions

File tree

lib/elixir/lib/string.ex

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3168,8 +3168,7 @@ defmodule String do
31683168
def jaro_distance("", _string), do: 0.0
31693169

31703170
def jaro_distance(string1, string2) when is_binary(string1) and is_binary(string2) do
3171-
# TODO: Replace by :string.jaro_similarity/2 when we require Erlang/OTP 27+
3172-
:elixir_utils.jaro_similarity(string1, string2)
3171+
:string.jaro_similarity(string1, string2)
31733172
end
31743173

31753174
@doc """

lib/elixir/src/elixir_utils.erl

Lines changed: 1 addition & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
read_file_type/1, read_file_type/2, read_link_type/1, read_posix_mtime_and_size/1,
1313
change_posix_time/2, change_universal_time/2, var_info/2,
1414
guard_op/2, guard_info/1, extract_splat_guards/1, extract_guards/1,
15-
erlang_comparison_op_to_elixir/1, erl_fa_to_elixir_fa/2, jaro_similarity/2]).
15+
erlang_comparison_op_to_elixir/1, erl_fa_to_elixir_fa/2]).
1616
-include("elixir.hrl").
1717
-include_lib("kernel/include/file.hrl").
1818

@@ -247,86 +247,3 @@ returns_boolean({'__block__', _, Exprs}) ->
247247
returns_boolean(lists:last(Exprs));
248248

249249
returns_boolean(_) -> false.
250-
251-
252-
% TODO: Remove me when we require Erlang/OTP 27+
253-
% This is a polyfill for older versions, copying the code from
254-
% https://github.com/erlang/otp/pull/7879
255-
-spec jaro_similarity(String1, String2) -> Similarity when
256-
String1 :: unicode:chardata(),
257-
String2 :: unicode:chardata(),
258-
Similarity :: float(). %% Between +0.0 and 1.0
259-
jaro_similarity(A0, B0) ->
260-
{A, ALen} = str_to_gcl_and_length(A0),
261-
{B, BLen} = str_to_indexmap(B0),
262-
Dist = max(ALen, BLen) div 2,
263-
{AM, BM} = jaro_match(A, B, -Dist, Dist, [], []),
264-
if
265-
ALen =:= 0 andalso BLen =:= 0 ->
266-
1.0;
267-
ALen =:= 0 orelse BLen =:= 0 ->
268-
0.0;
269-
AM =:= [] ->
270-
0.0;
271-
true ->
272-
{M,T} = jaro_calc_mt(AM, BM, 0, 0),
273-
(M/ALen + M/BLen + (M-T/2)/M) / 3
274-
end.
275-
276-
jaro_match([A|As], B0, Min, Max, AM, BM) ->
277-
case jaro_detect(maps:get(A, B0, []), Min, Max) of
278-
false ->
279-
jaro_match(As, B0, Min+1, Max+1, AM, BM);
280-
{J, Remain} ->
281-
B = B0#{A => Remain},
282-
jaro_match(As, B, Min+1, Max+1, [A|AM], add_rsorted({J,A},BM))
283-
end;
284-
jaro_match(_A, _B, _Min, _Max, AM, BM) ->
285-
{AM, BM}.
286-
287-
jaro_detect([Idx|Rest], Min, Max) when Min < Idx, Idx < Max ->
288-
{Idx, Rest};
289-
jaro_detect([Idx|Rest], Min, Max) when Idx < Max ->
290-
jaro_detect(Rest, Min, Max);
291-
jaro_detect(_, _, _) ->
292-
false.
293-
294-
jaro_calc_mt([CharA|AM], [{_, CharA}|BM], M, T) ->
295-
jaro_calc_mt(AM, BM, M+1, T);
296-
jaro_calc_mt([_|AM], [_|BM], M, T) ->
297-
jaro_calc_mt(AM, BM, M+1, T+1);
298-
jaro_calc_mt([], [], M, T) ->
299-
{M, T}.
300-
301-
302-
%% Returns GC list and length
303-
str_to_gcl_and_length(S0) ->
304-
gcl_and_length(unicode_util:gc(S0), [], 0).
305-
306-
gcl_and_length([C|Str], Acc, N) ->
307-
gcl_and_length(unicode_util:gc(Str), [C|Acc], N+1);
308-
gcl_and_length([], Acc, N) ->
309-
{lists:reverse(Acc), N};
310-
gcl_and_length({error, Err}, _, _) ->
311-
error({badarg, Err}).
312-
313-
%% Returns GC map with index and length
314-
str_to_indexmap(S) ->
315-
[M|L] = str_to_map(unicode_util:gc(S), 0),
316-
{M,L}.
317-
318-
str_to_map([], L) -> [#{}|L];
319-
str_to_map([G | Gs], I) ->
320-
[M|L] = str_to_map(unicode_util:gc(Gs), I+1),
321-
[maps:put(G, [I | maps:get(G, M, [])], M)| L];
322-
str_to_map({error,Error}, _) ->
323-
error({badarg, Error}).
324-
325-
%% Add in decreasing order
326-
add_rsorted(A, [H|_]=BM) when A > H ->
327-
[A|BM];
328-
add_rsorted(A, [H|BM]) ->
329-
[H|add_rsorted(A,BM)];
330-
add_rsorted(A, []) ->
331-
[A].
332-

0 commit comments

Comments
 (0)