Skip to content

Commit 29a0c55

Browse files
committed
apache license, katex
1 parent 36f9b05 commit 29a0c55

File tree

5 files changed

+349
-81
lines changed

5 files changed

+349
-81
lines changed

LICENSE

Lines changed: 10 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,15 @@
1-
MIT License
1+
Apache-2.0 License
22

33
Copyright (c) 2022 Peter Zingg
44

5-
Permission is hereby granted, free of charge, to any person obtaining a copy
6-
of this software and associated documentation files (the "Software"), to deal
7-
in the Software without restriction, including without limitation the rights
8-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9-
copies of the Software, and to permit persons to whom the Software is
10-
furnished to do so, subject to the following conditions:
5+
Licensed under the Apache License, Version 2.0 (the "License");
6+
you may not use these files except in compliance with the License.
7+
You may obtain a copy of the License at
118

12-
The above copyright notice and this permission notice shall be included in all
13-
copies or substantial portions of the Software.
9+
http://www.apache.org/licenses/LICENSE-2.0
1410

15-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21-
SOFTWARE.
11+
Unless required by applicable law or agreed to in writing, software
12+
distributed under the License is distributed on an "AS IS" BASIS,
13+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
See the License for the specific language governing permissions and
15+
limitations under the License.

lib/dmp/match.ex

Lines changed: 91 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -116,12 +116,13 @@ defmodule Dmp.Match do
116116
s = alphabet(pattern)
117117

118118
# Initialise the bit arrays.
119-
matchmask = 1 <<< (pattern_length - 1)
120-
shiftmask = (1 <<< pattern_length) - 1
119+
match_mask = 1 <<< (pattern_length - 1)
120+
overflow_mask = (1 <<< pattern_length) - 1
121121
text_length = String.length(text)
122122

123123
constants =
124-
{text, pattern, loc, s, matchmask, shiftmask, text_length, pattern_length, match_distance}
124+
{text, pattern, loc, s, match_mask, overflow_mask, text_length, pattern_length,
125+
match_distance}
125126

126127
# Uncomment to see the bitarray
127128
# debug_alphabet(pattern, s) |> Enum.join("\n") |> IO.puts
@@ -207,14 +208,14 @@ defmodule Dmp.Match do
207208
# previous `d-1` error level.
208209
# `text_length - $$n$$ in the Wu and Manber paper.
209210
# `pattern_length` - $$m$$ in the Wu and Manber paper.
210-
# `matchmask` - Bitmask to test the value of $$R_j+1[m]$$.
211+
# `match_mask` - Bitmask to test the value of $$R_{j+1}[m]$$.
211212
#
212213
# Returns updated `best_loc`, `score_threshold`, and `max_distance`,
213214
# and the calculated $$R_j^d$$ bitarray from this level.
214215
defp search_at_error_level(
215216
d,
216217
{best_loc, score_threshold, max_distance, last_rd},
217-
{text, _pattern, loc, s, matchmask, shiftmask, text_length, pattern_length,
218+
{text, _pattern, loc, s, match_mask, overflow_mask, text_length, pattern_length,
218219
match_distance}
219220
) do
220221
distance =
@@ -232,7 +233,9 @@ defmodule Dmp.Match do
232233
# for debugging we store the "size" of the array at index -1
233234
rd = %{(finish + 1) => (1 <<< d) - 1, -1 => finish + 2}
234235
acc2 = {best_loc, score_threshold, start, rd}
235-
constants2 = {d, text, loc, last_rd, s, matchmask, shiftmask, pattern_length, match_distance}
236+
237+
constants2 =
238+
{d, text, loc, last_rd, s, match_mask, overflow_mask, pattern_length, match_distance}
236239

237240
{best_loc, score_threshold, _j, rd} =
238241
Enum.reduce_while(finish..0//-1, acc2, fn j, acc ->
@@ -263,26 +266,15 @@ defmodule Dmp.Match do
263266
# `bin_mid` - Midpoint between `bin_min` and `bin_max`
264267
#
265268
# Returns `bin_mid`, where `loc + bin_mid` has the lowest bitap score.
266-
defp best_distance(
267-
bin_min,
268-
bin_mid,
269-
_bin_max,
270-
_acc
271-
)
272-
when bin_min >= bin_mid do
273-
# Done
274-
bin_mid
275-
end
269+
defp best_distance(bin_min, bin_mid, _, _) when bin_min >= bin_mid, do: bin_mid
276270

277271
defp best_distance(
278272
bin_min,
279273
bin_mid,
280274
bin_max,
281275
{d, loc, pattern_length, score_threshold, match_distance}
282276
) do
283-
# Loop
284-
mid_loc = loc + bin_mid
285-
score = bitap_score(d, mid_loc, loc, pattern_length, match_distance)
277+
score = bitap_score(d, loc + bin_mid, loc, pattern_length, match_distance)
286278

287279
{bin_min, bin_max} =
288280
if score <= score_threshold do
@@ -301,55 +293,104 @@ defmodule Dmp.Match do
301293
)
302294
end
303295

304-
# This is the heart of the bitap algorithm.
305-
#
306-
# Updates the `rd` bitarray for the current error level `d` at the index `j`
307-
# (representing the zero-based location `j - 1` in `text`), and
308-
# then tests for a match (an exact match if `d == 0` or a match with
309-
# `d` errors).
310-
#
311-
# If a match is found, we calculate the error score (number of errors and
312-
# distance from expected location) and if it's lower than the current
313-
# threshold, we stop calculating the update if we have already gone
314-
# below the minimum possible location, or continue the update
315-
# going with a smaller range of indices.
316-
defp bitap_update(
317-
j,
318-
{best_loc, score_threshold, start, rd},
319-
_constants
320-
)
321-
when j < start do
296+
@typedoc "Accumulator for `bitap_update/3`."
297+
@type update_acc() :: {integer(), float(), non_neg_integer(), bitap_array()}
298+
@typep update_constants() ::
299+
{non_neg_integer(), String.t(), integer(), bitap_array(), alpha(), non_neg_integer(),
300+
non_neg_integer(), non_neg_integer(), non_neg_integer()}
301+
302+
@doc """
303+
Perform the bitap algorithm and calculate error score if a match is found.
304+
305+
* `acc` - Accumulator tuple, with `best_loc`, `score_threshold`, `start`, and `rd` elements.
306+
* `constants` - Other constant values needed for calculations.
307+
308+
Updates the `rd` bitarray for the current error level `d` at the index `j`
309+
(representing the zero-based location `j - 1` in `text`), and
310+
then tests for a match (an exact match if `d == 0` or a match with
311+
`d` errors).
312+
313+
If a match is found at position `j`, calculate the error score
314+
(based on the error level `d` and the distance from expected location).
315+
If the score is lower than the current threshold, stop calculating the update
316+
if we have already gone below the minimum possible location,
317+
or continue the update, limiting the range of `j` (increasing the
318+
`start` value).
319+
320+
## Notes
321+
322+
The `j` index is decremented from the end of the text to the start of the text.
323+
Since the iteration is moving from high `j` to low, `bitap_update` does "Lshift"
324+
operations, not the "Rshift" operations in the Wu and Manber paper, and uses
325+
the previous values that were set at `j + 1`, not `j`.
326+
327+
Here the calculations are:
328+
329+
$$Rsubscptj^d = \\begin{cases}
330+
Lshift [ Rsubscpt{j+1}^d ] \\text{ AND } S_c &\\text{if } d = 0 \\cr
331+
Lshift [ Rsubscpt{j+1}^d ] \\text{ AND } S_c \\text{ OR } Lshift [ Rsubscptj^{d-1} \\text{ OR } Rsubscpt{j+1}^{d-1} ] \\text{ OR } Rsubscpt{j+1}^{d-1} &\\text{otherwise}
332+
\\end{cases}$$
333+
334+
versus in Wu and Manber's paper:
335+
336+
$$Rsubscptj^d = \\begin{cases}
337+
Rshift [ Rsubscpt{j}^d ] \\text{ AND } S_c &\\text{if } d = 0 \\cr
338+
Rshift [ Rsubscpt{j}^d ] \\text{ AND } S_c \\text{ OR } Rshift [ Rsubscptj^{d-1} \\text{ OR } Rsubscpt{j+1}^{d-1} ] \\text{ OR } Rsubscpt{j}^{d-1} &\\text{otherwise}
339+
\\end{cases}$$
340+
341+
"""
342+
@spec bitap_update(non_neg_integer(), update_acc(), update_constants()) ::
343+
{:cont | :halt, update_acc()}
344+
def bitap_update(j, acc, constants)
345+
346+
def bitap_update(
347+
j,
348+
{best_loc, score_threshold, start, rd},
349+
_constants
350+
)
351+
when j < start do
322352
# Exceeded our current distance from loc. Done.
323353
# Return `j + 1` in the `start` position and break the iteration.
324354
{:halt, {best_loc, score_threshold, j + 1, rd}}
325355
end
326356

327-
defp bitap_update(
328-
j,
329-
{best_loc, score_threshold, start, rd},
330-
{d, text, loc, last_rd, s, matchmask, shiftmask, pattern_length, match_distance}
331-
) do
357+
def bitap_update(
358+
j,
359+
{best_loc, score_threshold, start, rd},
360+
{d, text, loc, last_rd, s, match_mask, overflow_mask, pattern_length, match_distance}
361+
) do
362+
# $$S_c$$
332363
char_match = s_c(s, String.at(text, j - 1))
333364

334-
rd_j_1 = Map.get(rd, j + 1, 0) |> shift_left(shiftmask)
335-
rd_j_1 = rd_j_1 &&& char_match
365+
# Perform shift-OR update
366+
367+
# $$Lshift[R_{j+1}^d] AND S_c$$
368+
shift_d_and_s_c = (Map.get(rd, j + 1, 0) <<< 1 ||| 1) &&& char_match
336369

337370
rd_j =
338371
if d == 0 do
339372
# First pass: exact match.
340-
rd_j_1
373+
shift_d_and_s_c
341374
else
342375
# Subsequent passes: fuzzy match.
343-
last_rd_j_1 = Map.get(last_rd, j + 1, 0)
344-
last_rd_j = Map.get(last_rd, j, 0)
345-
last_rd_j = (last_rd_j ||| last_rd_j_1) |> shift_left(shiftmask)
346-
rd_j_1 ||| last_rd_j ||| last_rd_j_1
376+
# $$R_{j+1}^{d-1}$$
377+
rd_d1_j1 = Map.get(last_rd, j + 1, 0)
378+
# $$R_j^{d-1}$$
379+
rd_d1_j = Map.get(last_rd, j, 0)
380+
381+
# Restrict shifted values to pattern_length with $$AND overflow_mask$$
382+
# $$Lshift[R_j^{d-1} OR R_{j+1}^{d-1}]$$
383+
shift_d1 = ((rd_d1_j ||| rd_d1_j1) <<< 1 ||| 1) &&& overflow_mask
384+
385+
# $$Lshift[R_{j+1}^d] AND S_c OR Lshift[R_j^{d-1} OR R_{j+1}^{d-1}] OR R_{j+1}^{d-1}$$
386+
shift_d_and_s_c ||| shift_d1 ||| rd_d1_j1
347387
end
348388

349389
# Update mask array
350390
rd = Map.put(rd, j, rd_j)
351391

352-
if (rd_j &&& matchmask) != 0 do
392+
# Test for a match: $$if Rd_j+1[m] = 1$$
393+
if (rd_j &&& match_mask) != 0 do
353394
# Found a match
354395
test_score_at_match(
355396
d,
@@ -363,11 +404,6 @@ defmodule Dmp.Match do
363404
end
364405
end
365406

366-
# Keep values during shifts from overflowing by ANDing with `shiftmask`
367-
defp shift_left(val, shiftmask) do
368-
(val <<< 1 &&& shiftmask) ||| 1
369-
end
370-
371407
# We found a match during `bitap_update/3`. Verify
372408
# that it is the best match and either stop the `rd` array update
373409
# if we have already passed `loc` or reduce the range of indices

mix.exs

Lines changed: 41 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
defmodule Dmp.MixProject do
22
use Mix.Project
33

4+
@version "0.1.0"
5+
@github_project_url "https://github.com/pzingg/diff_match_patch"
6+
47
def project do
58
[
69
app: :diff_match_patch,
7-
version: "0.1.0",
10+
version: @version,
811
elixir: "~> 1.11",
912
start_permanent: Mix.env() == :prod,
13+
source_url: @github_project_url,
14+
hompepage_url: @github_project_url,
1015
deps: deps(),
16+
package: package(),
1117
docs: docs()
1218
]
1319
end
@@ -19,16 +25,25 @@ defmodule Dmp.MixProject do
1925
]
2026
end
2127

28+
defp package do
29+
[
30+
licenses: "Apache-2.0",
31+
links: %{"GitHub" => @github_project_url}
32+
]
33+
end
34+
2235
# Load KaTeX JavaScript to docs for math expressions
2336
defp docs do
2437
[
25-
main: "Diff Match Patch",
2638
authors: ["Peter Zingg <peter.zingg@gmail.com>"],
2739
assets: "priv/assets",
28-
javascript_config_path: "priv/assets/docs_config.js",
40+
javascript_config_path: "assets/docs_config.js",
41+
extras: ["README.md": [filename: "readme", title: "Diff Match Patch"]],
42+
main: "readme",
2943
# You can specify a function for adding
3044
# custom content to the generated HTML.
3145
# This is useful for custom JS/CSS files you want to include.
46+
before_closing_head_tag: &before_closing_head_tag/1,
3247
before_closing_body_tag: &before_closing_body_tag/1
3348
]
3449
end
@@ -38,18 +53,34 @@ defmodule Dmp.MixProject do
3853
# Once loaded, the script will dynamically render all LaTeX
3954
# expressions on the page in place.
4055
# For more details and options see https://katex.org/docs/autorender.html
41-
defp before_closing_body_tag(:html) do
56+
defp before_closing_head_tag(:html) do
4257
"""
43-
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.min.css" integrity="sha384-Xi8rHCmBmhbuyyhbI88391ZKP2dmfnOl4rT9ZfRI7mLTdk1wblIUnrIq35nqwEvC" crossorigin="anonymous"></script>
44-
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.min.js" integrity="sha384-X/XCfMm41VSsqRNQgDerQczD69XqmjOOOwYQvr/uuC+j4OPoNhVgjdGFwhvN02Ja" crossorigin="anonymous"></script>
45-
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/contrib/auto-render.min.js" integrity="sha384-+XBljXPPiv+OzfbB3cVmLHf4hdUFHlWNZN5spNQ7rmHTXpd7WvJum6fIACpNNfIR" crossorigin="anonymous"></script>
58+
<script defer src="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.js" integrity="sha384-I2b1Pcl48X93GxEkGkaMo1hrd6n+IX8H2wgSsMimGbkZoGTve/87h1FjaDNvlpQi" crossorigin="anonymous"></script>
59+
<script defer src="assets/auto-render.js"></script>
60+
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.16.0/dist/katex.min.css" integrity="sha384-Xi8rHCmBmhbuyyhbI88391ZKP2dmfnOl4rT9ZfRI7mLTdk1wblIUnrIq35nqwEvC" crossorigin="anonymous">
61+
<link rel="stylesheet" href="assets/docs.css">
62+
"""
63+
end
64+
65+
defp before_closing_head_tag(_), do: ""
4666

67+
# The `sbMacro` function is a workaround to the problem that
68+
# two underscores in Markdown are processed before KaTeX can
69+
# see them.
70+
defp before_closing_body_tag(:html) do
71+
"""
4772
<script>
48-
document.addEventListener("DOMContentLoaded", function() {
73+
const sbMacro = function(text) {
74+
return text.replace(/subscpt/g, '_');
75+
};
76+
77+
document.addEventListener('DOMContentLoaded', function() {
4978
renderMathInElement(document.body, {
79+
fleqn: true,
80+
preProcess: sbMacro,
5081
delimiters: [
51-
{ left: "$$", right: "$$", display: true },
52-
{ left: "$", right: "$", display: false },
82+
{ left: '$$', right: '$$', display: true },
83+
{ left: '$', right: '$', display: false },
5384
]
5485
});
5586
});

0 commit comments

Comments
 (0)