Skip to content

Commit 36f9b05

Browse files
committed
tests with match_max_bits
1 parent 6569821 commit 36f9b05

File tree

5 files changed

+100
-12
lines changed

5 files changed

+100
-12
lines changed

lib/dmp/options.ex

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,11 @@ defmodule Dmp.Options do
55
* `:diff_timeout` - Number of seconds to map a diff before giving up (0 for infinity).
66
* `:diff_edit_cost` - Cost of an empty edit operation in terms of edit characters.
77
* `:match_max_bits` - The number of bits in an integer (default is expected 32).
8+
This parameter controls the lengths of patterns used in matching and patch splitting.
9+
Set `:match_max_bits` to 0 to disable patch splitting. To avoid long patches in
10+
certain pathological cases, use 32. Elixir supports arbitrarily large integers,
11+
so we allow values of 64 and 128, as well as smaller values. Multiple short patches
12+
(using native ints, `:match_max_bits` of 32 or less) should be much faster than long ones.
813
* `:match_threshold` - At what point is no match declared (0.0 = perfection, 1.0 = very loose).
914
* `:match_distance` - How far to search for a match (0 = exact location, 1000+ = broad match).
1015
A match this many characters away from the expected location will add
@@ -13,7 +18,7 @@ defmodule Dmp.Options do
1318
the contents have to be to match the expected contents. (0.0 = perfection,
1419
1.0 = very loose). Note that `:match_threshold` controls how closely the
1520
end points of a delete need to match.
16-
* `:patch_margin` - Chunk size for context length.
21+
* `:patch_margin` - Chunk size for context length. 4 is a good value.
1722
"""
1823

1924
alias __MODULE__
@@ -105,7 +110,7 @@ defmodule Dmp.Options do
105110
end
106111

107112
defp valid_match_max_bits?(match_max_bits) do
108-
match_max_bits > 0 && match_max_bits <= 128
113+
Enum.member?([0, 8, 16, 32, 64, 128], match_max_bits)
109114
end
110115

111116
defp valid_threshold?(value) do
@@ -173,12 +178,9 @@ defmodule Dmp.Options do
173178
end
174179

175180
defp validate_patch_margin({opts, errors}) do
176-
match_max_bits = Keyword.fetch!(opts, :match_max_bits)
177181
patch_margin = Keyword.fetch!(opts, :patch_margin)
178182

179-
if patch_margin >= 0 &&
180-
(!valid_match_max_bits?(match_max_bits) ||
181-
patch_margin < match_max_bits) do
183+
if patch_margin >= 0 do
182184
{opts, errors}
183185
else
184186
{opts, [{"patch_margin", patch_margin} | errors]}

lib/dmp/patch.ex

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -451,10 +451,10 @@ defmodule Dmp.Patch do
451451
as an array of true/false values indicating which patches were applied.
452452
453453
* `patches` - A patchlist.
454-
* `text` - Old text.
454+
* `text` - Text to apply patch to.
455455
* `opts` - A options keyword list, `[]` to use the default options.
456456
457-
Returns a tuple with two elements: the new text, and a list of
457+
Returns a tuple with two elements: the patched text, and a list of
458458
boolean values. Each boolean corresponds to a patch in the patchlist,
459459
and is `true` if a match was found for the corresponding patch.
460460
"""
@@ -635,6 +635,7 @@ defmodule Dmp.Patch do
635635

636636
@doc """
637637
Add some padding on text start and end so that edges can match something.
638+
638639
Intended to be called only from within `Patch.apply`.
639640
640641
* `patches` - A patchlist..

mix.exs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ defmodule Dmp.MixProject do
2020
end
2121

2222
# Load KaTeX JavaScript to docs for math expressions
23-
def docs do
23+
defp docs do
2424
[
2525
main: "Diff Match Patch",
2626
authors: ["Peter Zingg <peter.zingg@gmail.com>"],

test/patch_test.exs

Lines changed: 80 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,7 @@ defmodule PatchTest do
221221
assert diffs == patch_diffs
222222
end
223223

224+
@tag :match_max_32
224225
test "long string with repeats" do
225226
text1 = repeats()
226227
text2 = text1 <> "123"
@@ -241,12 +242,30 @@ defmodule PatchTest do
241242
)
242243

243244
patches = Patch.split_max(patches, 4)
244-
assert patches != []
245245

246246
assert "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n" ==
247247
Patch.to_text(patches)
248248
end
249249

250+
test "example 1, no splitting" do
251+
opts = [match_max_bits: 0]
252+
253+
patches =
254+
Patch.make(
255+
"abcdefghijklmnopqrstuvwxyz01234567890",
256+
"XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0",
257+
opts
258+
)
259+
260+
unsplit =
261+
"@@ -1,37 +1,56 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n"
262+
263+
assert unsplit == Patch.to_text(patches)
264+
265+
patches = Patch.split_max(patches, 4, opts)
266+
assert unsplit == Patch.to_text(patches)
267+
end
268+
250269
test "example 2" do
251270
patches =
252271
Patch.make(
@@ -272,7 +291,8 @@ defmodule PatchTest do
272291
Patch.to_text(patches)
273292
end
274293

275-
test "example 4" do
294+
@tag :match_max_32
295+
test "example 4, 32 bits" do
276296
patches =
277297
Patch.make(
278298
"abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1",
@@ -284,6 +304,60 @@ defmodule PatchTest do
284304
assert "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n" ==
285305
Patch.to_text(patches)
286306
end
307+
308+
@tag :match_max_64
309+
test "example 4, 64 bits" do
310+
text1 = "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1"
311+
text2 = "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"
312+
# text1, with only the first patch applied
313+
expected3 =
314+
"abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1"
315+
316+
patches_32 = Patch.make(text1, text2)
317+
318+
assert "@@ -2,33 +2,33 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdefg\n@@ -29,33 +29,33 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdefg\n" ==
319+
Patch.to_text(patches_32)
320+
321+
# Only apply the first patch
322+
{text3, _} = patches_32 |> Enum.take(1) |> Patch.apply(text1)
323+
# Only the first "h : 0" was changed
324+
assert expected3 == text3
325+
326+
# Then apply the second patch
327+
{text4, _} = patches_32 |> Enum.drop(1) |> Patch.apply(text3)
328+
assert text2 == text4
329+
330+
patches_32 = Patch.split_max(patches_32, 4)
331+
# After splitting, the patches are slightly different
332+
assert "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n" ==
333+
Patch.to_text(patches_32)
334+
335+
{text3, _} = Patch.apply(patches_32, text1)
336+
assert text2 == text3
337+
338+
opts_64 = [match_max_bits: 64]
339+
patches_64 = Patch.make(text1, text2, opts_64)
340+
# The patches are different than the 32-bit case
341+
assert "@@ -1,58 +1,58 @@\n abcdefghij , h : \n-0\n+1\n , t : 1 abcdefghij , h : 0 , t : 1 abcd\n@@ -29,33 +29,33 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdefg\n" ==
342+
Patch.to_text(patches_64)
343+
344+
# Only apply the first patch
345+
{text3, _} = patches_64 |> Enum.take(1) |> Patch.apply(text1)
346+
# Only the first "h : 0" was changed
347+
assert expected3 == text3
348+
349+
# Then apply the second patch
350+
{text4, _} = patches_64 |> Enum.drop(1) |> Patch.apply(text3)
351+
assert text2 == text4
352+
353+
patches_64 = Patch.split_max(patches_64, 4, opts_64)
354+
# split_max has no effect
355+
assert "@@ -1,58 +1,58 @@\n abcdefghij , h : \n-0\n+1\n , t : 1 abcdefghij , h : 0 , t : 1 abcd\n@@ -29,33 +29,33 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdefg\n" ==
356+
Patch.to_text(patches_64)
357+
358+
{text3, _} = Patch.apply(patches_64, text1)
359+
assert text2 == text3
360+
end
287361
end
288362

289363
describe "add_padding" do
@@ -337,6 +411,7 @@ defmodule PatchTest do
337411
assert {"I am the very model of a modern major general.", [false, false]} == results
338412
end
339413

414+
@tag :match_max_32
340415
test "big delete, small change" do
341416
patches =
342417
Patch.make(
@@ -353,6 +428,7 @@ defmodule PatchTest do
353428
assert {"xabcy", [true, true]} == results
354429
end
355430

431+
@tag :match_max_32
356432
test "big delete, big change 1" do
357433
patches =
358434
Patch.make(
@@ -370,6 +446,7 @@ defmodule PatchTest do
370446
[false, true]} == results
371447
end
372448

449+
@tag :match_max_32
373450
test "big delete, big change 2" do
374451
patches =
375452
Patch.make(
@@ -415,6 +492,7 @@ defmodule PatchTest do
415492
assert {"test", [true]} == results
416493
end
417494

495+
@tag :match_max_32
418496
test "no side effects with major delete" do
419497
patches = Patch.make("The quick brown fox jumps over the lazy dog.", "Woof")
420498
patchstr = Patch.to_text(patches)

test/test_helper.exs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,8 @@
1-
ExUnit.start(exclude: [:skip, :good])
1+
# These tests depend on a specific `:match_max_bits` value.
2+
excluded =
3+
case Dmp.Options.default() |> Keyword.fetch!(:match_max_bits) do
4+
32 -> []
5+
_ -> [:match_max_32]
6+
end
7+
8+
ExUnit.start(exclude: [:skip | excluded])

0 commit comments

Comments
 (0)