Skip to content

Commit 7b98aec

Browse files
committed
better documentation
1 parent a94395d commit 7b98aec

File tree

8 files changed

+339
-71
lines changed

8 files changed

+339
-71
lines changed

README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,12 @@ A translation of Google's public-domain
66
For information about the original Google project and its application,
77
see that repository's [wiki pages](https://github.com/google/diff-match-patch/wiki).
88

9+
References from the Google project:
10+
11+
* diff: [An O(ND) Difference Algorithm and Its Variations (Meyers, 1986)](http://www.xmailserver.org/diff2.pdf)
12+
* match: [Fast Text Searching with Errors (Wu and Manber, 1991)](http://www.club.cc.cmu.edu/~ajo/docs/agrep.pdf)
13+
14+
915
## Installation
1016

1117
If [available in Hex](https://hex.pm/docs/publish), the package can be installed

lib/dmp/debug_utils.ex

Lines changed: 133 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,133 @@
1+
defmodule Dmp.DebugUtils do
2+
@moduledoc """
3+
Utilities for debugging bitarrays.
4+
"""
5+
6+
use Bitwise, only_operators: true
7+
8+
@doc """
9+
Prints the `alphabet` bitarray on IO, showing binary values.
10+
"""
11+
@spec debug_alphabet(String.t(), Dmp.Match.alpha()) :: nil
12+
def debug_alphabet(pattern, s) do
13+
alphabet_header(pattern) |> IO.puts()
14+
pattern_length = String.length(pattern)
15+
16+
String.codepoints(pattern)
17+
|> Enum.sort()
18+
|> Enum.dedup()
19+
|> Enum.map(fn ch -> alphabet_line(ch, s, pattern_length) |> IO.puts() end)
20+
21+
nil
22+
end
23+
24+
defp alphabet_header(pattern) do
25+
line = ["\n alphabet:" | String.codepoints(pattern)]
26+
Enum.join(line, " ")
27+
end
28+
29+
defp alphabet_line(ch, s, pattern_length) do
30+
ord = String.to_charlist(ch) |> List.first()
31+
value = Map.get(s, ord, 0)
32+
valstr = to_string(value) |> String.pad_leading(12)
33+
bits = bitmap_to_list(value, pattern_length)
34+
line = [" ", ch, valstr <> ":"] ++ bits
35+
Enum.join(line, " ")
36+
end
37+
38+
@doc """
39+
Prints the `rd` bitarray on IO, showing binary values.
40+
41+
* `d` - Error level for the bitarray.
42+
* `start` - Lowest index that has been calculated.
43+
* `best_loc` - Index in the text where the best match has been found.
44+
"""
45+
@spec debug_rd(
46+
String.t(),
47+
String.t(),
48+
non_neg_integer(),
49+
Dmp.Match.bitap_array(),
50+
non_neg_integer(),
51+
integer()
52+
) :: nil
53+
def debug_rd(text, pattern, d, rd, start \\ 0, best_loc \\ -1) do
54+
rd_size = max(String.length(text) + 2, Map.fetch!(rd, -1))
55+
rd_header(d, pattern) |> IO.puts()
56+
pattern_length = String.length(pattern)
57+
58+
Enum.map(0..(rd_size - 1), fn j ->
59+
ch =
60+
if j == 0 do
61+
nil
62+
else
63+
String.at(text, j - 1)
64+
end
65+
66+
rd_j_line(ch, j, rd, pattern_length, start, best_loc) |> IO.puts()
67+
end)
68+
69+
nil
70+
end
71+
72+
defp rd_header(d, pattern) do
73+
dstr = "rd_j^#{d}" |> String.pad_trailing(7)
74+
line = ["#{dstr} pattern:" | String.codepoints(pattern)]
75+
Enum.join(line, " ")
76+
end
77+
78+
defp rd_j_line(nil, j, rd, pattern_length, start, best_loc) do
79+
rd_j_line("_", j, rd, pattern_length, start, best_loc)
80+
end
81+
82+
defp rd_j_line(ch, j, rd, pattern_length, start, best_loc) do
83+
value = Map.get(rd, j, 0)
84+
valstr = to_string(value) |> String.pad_leading(12)
85+
jstr = to_string(j) |> String.pad_leading(2)
86+
87+
jstr =
88+
cond do
89+
best_loc != -1 && j - 1 == best_loc ->
90+
jstr <> "@"
91+
92+
j < start ->
93+
jstr <> "*"
94+
95+
true ->
96+
jstr <> " "
97+
end
98+
99+
bits = bitmap_to_list(value, pattern_length)
100+
line = [jstr, ch, valstr <> ":"] ++ bits
101+
Enum.join(line, " ")
102+
end
103+
104+
@doc """
105+
Returns a list of "codepoints" (single-character strings)
106+
showing the base-2 value of `value`.
107+
108+
A minimum of `padding` elements are returned.
109+
"""
110+
@spec bitmap_to_list(non_neg_integer(), non_neg_integer()) :: [String.t()]
111+
def bitmap_to_list(value, padding \\ 0) do
112+
encode_bit_loop(value, "")
113+
|> String.pad_leading(padding, "0")
114+
|> String.codepoints()
115+
end
116+
117+
defp encode_bit_loop(v, acc) do
118+
acc =
119+
if (v &&& 1) == 0 do
120+
"0" <> acc
121+
else
122+
"1" <> acc
123+
end
124+
125+
v = v >>> 1
126+
127+
if v == 0 do
128+
acc
129+
else
130+
encode_bit_loop(v, acc)
131+
end
132+
end
133+
end

lib/dmp/diff.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ defmodule Dmp.Diff do
273273
Find the "middle snake" of a diff, split the problem in two
274274
and return the recursively constructed diff.
275275
276-
See Myers 1986 paper: [An O(ND) Difference Algorithm and Its Variations.](http://www.xmailserver.org/diff2.pdf)
276+
See: [An O(ND) Difference Algorithm and Its Variations (Meyers, 1986)](http://www.xmailserver.org/diff2.pdf)
277277
278278
* `text1` - Old string to be diffed.
279279
* `text2` - New string to be diffed.

0 commit comments

Comments
 (0)