diff --git a/README.md b/README.md index 85bee0e5..9f1cda76 100644 --- a/README.md +++ b/README.md @@ -514,6 +514,14 @@ make test See [CONTRIBUTING.md](/CONTRIBUTING.md) for detailed guidelines. +# Acknowledgments + +## diff-match-patch + +CopilotChat.nvim includes [diff-match-patch (Lua port)](https://github.com/google/diff-match-patch) for diffing and patching functionality. +Copyright 2018 The diff-match-patch Authors. +Licensed under the Apache License 2.0. + # Contributors Thanks goes to these wonderful people ([emoji key](https://allcontributors.org/docs/en/emoji-key)): diff --git a/lua/CopilotChat/config/mappings.lua b/lua/CopilotChat/config/mappings.lua index 97a384f8..05122044 100644 --- a/lua/CopilotChat/config/mappings.lua +++ b/lua/CopilotChat/config/mappings.lua @@ -172,11 +172,7 @@ return { local path = block.header.filename local bufnr = prepare_diff_buffer(path, source) - local new_lines, applied = diff.apply_diff(block, bufnr) - if not applied then - new_lines = utils.split_lines(block.content) - end - + local new_lines = diff.apply_diff(block, bufnr) vim.api.nvim_buf_set_lines(bufnr, 0, -1, false, new_lines) local first, last = diff.get_diff_region(block, bufnr) if first and last then @@ -227,15 +223,11 @@ return { local path = block.header.filename local bufnr = prepare_diff_buffer(path, source) - local new_lines, applied = diff.apply_diff(block, bufnr) - if not applied then - new_lines = utils.split_lines(block.content) - end - local original_lines = vim.api.nvim_buf_get_lines(bufnr, 0, -1, false) + local new_lines = diff.apply_diff(block, bufnr) local opts = { filetype = vim.bo[bufnr].filetype, - text = applied and table.concat(new_lines, '\n') or table.concat(original_lines, '\n'), + text = table.concat(new_lines, '\n'), } opts.on_show = function() diff --git a/lua/CopilotChat/config/prompts.lua b/lua/CopilotChat/config/prompts.lua index 40d9be1e..53baa21f 100644 --- a/lua/CopilotChat/config/prompts.lua +++ b/lua/CopilotChat/config/prompts.lua @@ -29,7 +29,7 @@ The user is currently in workspace directory {DIR} (project root). File paths ar Context is provided to you in several ways: - Resources: Contextual data shared via "# " headers and referenced via "##" links - Code blocks with file path labels and line numbers (e.g., ```lua path=/file.lua start_line=1 end_line=10```) - Note: Line numbers prefixed to each line are for reference only and should never be included when outputting code + Note: Each line in code block can be prefixed with : for your reference only. NEVER include these line numbers in your responses. - Visual selections: Text selected in visual mode that can be shared as context - Diffs: Changes shown in unified diff format (+, -, etc.) - Conversation history @@ -41,7 +41,7 @@ If you can infer the project type (languages, frameworks, libraries) from contex For implementing features, break down the request into concepts and provide a clear solution. Think creatively to provide complete solutions based on the information available. Never fabricate or hallucinate file contents you haven't actually seen in the provided context. -When outputting code, never include line number prefixes - they are only for reference when analyzing the provided context. +When outputting code or diffs, NEVER include line number prefixes - they are only for reference when analyzing the provided context. ]], }, diff --git a/lua/CopilotChat/instructions/edit_file_unified.lua b/lua/CopilotChat/instructions/edit_file_unified.lua index b5d20861..9eb8f56f 100644 --- a/lua/CopilotChat/instructions/edit_file_unified.lua +++ b/lua/CopilotChat/instructions/edit_file_unified.lua @@ -2,67 +2,33 @@ return [[ Return edits similar to unified diffs that `diff -U0` would produce. -- Always include the first 2 lines with the file paths (no timestamps). -- Start each hunk of changes with a `@@ ... @@` line. -- Do not include line numbers in the hunk header. -- The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file. -- Indentation matters in the diffs! +Make sure you include the first 2 lines with the file paths. +Don't include timestamps with the file paths. +Do not use any file path prefixes, just use --- path/to/file and +++ path/to/file. -Context lines: -- For each hunk that contains changes, you MUST always include 2-3 context lines before the change. -- ALWAYS prefix every context line with a single space character. -- Context lines MUST ONLY appear BEFORE changes, NEVER after changes. -- MISSING CONTEXT LINES WILL CAUSE PATCH FAILURES - they are mandatory, not optional. -- MISSING SPACE PREFIXES WILL CAUSE PATCH FAILURES - they are mandatory, not optional. +Start each hunk of changes with a `@@` line. -Change lines: -- Mark all lines to be removed or changed with `-`. -- Mark all new or modified lines with `+`. -- Only output hunks that specify changes with `+` or `-` lines. +The user's patch tool needs CORRECT patches that apply cleanly against the current contents of the file! +Code can start with line number prefixes for reference (e.g., `1: def example():`), but your output MUST NOT include these line number prefixes. +Think carefully and make sure you include and mark all lines that need to be removed or changed as `-` lines. +Make sure you mark all new or modified lines with `+`. +Don't leave out any lines or the diff patch won't apply correctly. -Other instructions: -- Start a new hunk for each section of the file that needs changes. -- When editing a function, method, loop, etc., replace the entire code block: delete the entire existing version with `-` lines, then add the new, updated version with `+` lines. -- To move code within a file, use 2 hunks: one to delete it from its current location, one to insert it in the new location. -- To make a new file, show a diff from `--- /dev/null` to `+++ path/to/new/file.ext`. +Indentation matters in the diffs! -Example: +Start a new hunk for each section of the file that needs changes. -```diff ---- mathweb/flask/app.py -+++ mathweb/flask/app.py -@@ ... @@ --class MathWeb: -+import sympy -+ -+class MathWeb: -@@ ... @@ --def is_prime(x): -- if x < 2: -- return False -- for i in range(2, int(math.sqrt(x)) + 1): -- if x % i == 0: -- return False -- return True -@@ ... @@ --@app.route('/prime/') --def nth_prime(n): -- count = 0 -- num = 1 -- while count < n: -- num += 1 -- if is_prime(num): -- count += 1 -- return str(num) -+@app.route('/prime/') -+def nth_prime(n): -+ count = 0 -+ num = 1 -+ while count < n: -+ num += 1 -+ if sympy.isprime(num): -+ count += 1 -+ return str(num) -``` +Only output hunks that specify changes with `+` or `-` lines. + +Output hunks in whatever order makes the most sense. +Hunks don't need to be in any particular order. + +When editing a function, method, loop, etc use a hunk to replace the *entire* code block. +Delete the entire existing version with `-` lines and then add a new, updated version with `+` lines. +This will help you generate correct code and correct diffs. + +To move code within a file, use 2 hunks: 1 to delete it from its current location, 1 to insert it in the new location. + +To make a new file, show a diff from `--- /dev/null` to `+++ path/to/new/file.ext`. ]] diff --git a/lua/CopilotChat/utils/diff.lua b/lua/CopilotChat/utils/diff.lua index 8ca1a58e..86449c97 100644 --- a/lua/CopilotChat/utils/diff.lua +++ b/lua/CopilotChat/utils/diff.lua @@ -1,204 +1,182 @@ local M = {} ---- Parse unified diff, return file_path and hunks ----@param diff_text string The unified diff text ----@return string?, table[] -function M.parse_unified_diff(diff_text) +--- Parse unified diff hunks from diff text +---@param diff_text string +---@return table hunks +local function parse_hunks(diff_text) local hunks = {} local current_hunk = nil - local file_path = nil - for _, line in ipairs(vim.split(diff_text, '\n')) do - local diff_filename = line:match('^%+%+%+%s+(.*)') - if diff_filename then - file_path = diff_filename - elseif line:match('^@@') then + if line:match('^@@') then if current_hunk then table.insert(hunks, current_hunk) end - current_hunk = { minus = {}, plus = {}, context = {} } + local start_old, len_old, start_new, len_new = line:match('@@%s%-(%d+),?(%d*)%s%+(%d+),?(%d*)%s@@') + current_hunk = { + start_old = tonumber(start_old), + len_old = tonumber(len_old) or 1, + start_new = tonumber(start_new), + len_new = tonumber(len_new) or 1, + old_snippet = {}, + new_snippet = {}, + } elseif current_hunk then - local prefix = line:sub(1, 1) - local rest = line:sub(2) + local prefix, rest = line:sub(1, 1), tostring(line:sub(2)) if prefix == '-' then - table.insert(current_hunk.minus, rest) + table.insert(current_hunk.old_snippet, rest) elseif prefix == '+' then - table.insert(current_hunk.plus, rest) - elseif #current_hunk.plus == 0 and #current_hunk.minus == 0 then - if prefix == ' ' then - table.insert(current_hunk.context, rest) - elseif line ~= '' then - table.insert(current_hunk.context, line) - end + table.insert(current_hunk.new_snippet, rest) + elseif prefix == ' ' then + table.insert(current_hunk.old_snippet, rest) + table.insert(current_hunk.new_snippet, rest) end end end if current_hunk then table.insert(hunks, current_hunk) end - return file_path, hunks + return hunks end ---- Apply unified diff to a table of lines and return new lines ----@param diff_text string ----@param original_lines table ----@return table, boolean -function M.apply_unified_diff(diff_text, original_lines) - local _, hunks = M.parse_unified_diff(diff_text) - local lines = vim.deepcopy(original_lines) - local applied_any = false - - for _, hunk in ipairs(hunks) do - -- Build the full hunk pattern: context + minus lines - local hunk_pattern = {} - for _, ctx in ipairs(hunk.context) do - table.insert(hunk_pattern, ctx) - end - for _, minus in ipairs(hunk.minus) do - table.insert(hunk_pattern, minus) - end +--- Apply a single hunk to content, with fallback/context logic +---@param hunk table +---@param content string +---@return string patched_content, boolean applied_cleanly +local function apply_hunk(hunk, content) + local dmp = require('CopilotChat.vendor.diff_match_patch') + local patch = dmp.patch_make(table.concat(hunk.old_snippet, '\n'), table.concat(hunk.new_snippet, '\n')) + + -- First try: direct application + local patched, results = dmp.patch_apply(patch, content) + if not vim.tbl_contains(results, false) then + return patched, true + end - -- Find all possible matches for the hunk pattern - local match_indices = {} - for i = 1, #lines - #hunk_pattern + 1 do - local match = true - for j = 1, #hunk_pattern do - if vim.trim(lines[i + j - 1]) ~= vim.trim(hunk_pattern[j]) then - match = false - break + -- Fallback: try smaller context window + local lines = vim.split(content, '\n') + local insert_idx = hunk.start_old or 1 + if not hunk.start_old then + -- No starting point, try to find best match + local match_idx, best_score = nil, -1 + local context_lines = vim.tbl_filter(function(line) + return line and line ~= '' + end, hunk.old_snippet) + local context_len = #context_lines + if context_len > 0 then + for i = 1, #lines - context_len + 1 do + local score = 0 + for j = 1, context_len do + if vim.trim(lines[i + j - 1] or '') == vim.trim(context_lines[j] or '') then + score = score + 1 + end + end + if score > best_score then + best_score = score + match_idx = i end - end - if match then - table.insert(match_indices, i) end end - - if #match_indices == 1 then - local idx = match_indices[1] - -- Replace the matched region with context + plus lines - local new_region = {} - for _, ctx in ipairs(hunk.context) do - table.insert(new_region, ctx) - end - for _, plus in ipairs(hunk.plus) do - table.insert(new_region, plus) - end - - for j = 1, #hunk_pattern do - table.remove(lines, idx) - end - for j = #new_region, 1, -1 do - table.insert(lines, idx, new_region[j]) - end - applied_any = true + if best_score > 0 and match_idx then + insert_idx = match_idx end + end - -- If no match or multiple matches, just skip to next hunk + -- Define context window around insert point + local context_size = 10 + local start_idx = insert_idx + local end_idx = insert_idx + #hunk.old_snippet + local context_start = math.max(1, start_idx - context_size) + local context_end = math.min(#lines, end_idx + context_size) + local context_window = table.concat(vim.list_slice(lines, context_start, context_end), '\n') + + local patched_window, window_results = dmp.patch_apply(patch, context_window) + if not vim.tbl_contains(window_results, false) then + -- Patch succeeded in window, splice back + local new_lines = vim.list_slice(lines, 1, context_start - 1) + vim.list_extend(new_lines, vim.split(patched_window, '\n')) + vim.list_extend(new_lines, lines, context_end + 1, #lines) + return table.concat(new_lines, '\n'), true end - return lines, applied_any + -- Fallback: direct replacement + local new_lines = vim.list_slice(lines, 1, start_idx - 1) + vim.list_extend(new_lines, hunk.new_snippet) + vim.list_extend(new_lines, lines, end_idx + 1, #lines) + return table.concat(new_lines, '\n'), false end ---- Apply diff indices from vim.diff to original and new lines ----@param hunks table Indices from vim.diff (result_type = 'indices') ----@param original_lines table Lines before patch ----@param new_lines table Lines after patch ----@return table Patched lines -function M.apply_diff_indices(hunks, original_lines, new_lines) - local result = {} - local orig_idx = 1 - +--- Apply unified diff to a table of lines and return new lines +---@param diff_text string +---@param original_content string +---@return table, boolean, integer, integer +function M.apply_unified_diff(diff_text, original_content) + local hunks = parse_hunks(diff_text) + local new_content = original_content + local applied = false for _, hunk in ipairs(hunks) do - local start_a, count_a, start_b, count_b = unpack(hunk) - -- Add unchanged lines before hunk - for i = orig_idx, start_a - 1 do - table.insert(result, original_lines[i]) - end - -- Add changed lines from new_lines - for i = start_b, start_b + count_b - 1 do - table.insert(result, new_lines[i]) - end - orig_idx = start_a + count_a - end - -- Add remaining lines - for i = orig_idx, #original_lines do - table.insert(result, original_lines[i]) + local patched, ok = apply_hunk(hunk, new_content) + new_content = patched + applied = applied or ok end - return result -end - ---- Get changed regions for jump/highlight ----@param diff_text string The unified diff text ----@return number?, number? -function M.get_unified_diff_region(diff_text, original_lines) - local _, hunks = M.parse_unified_diff(diff_text) + local original_lines = vim.split(original_content, '\n') + local new_lines = vim.split(new_content, '\n') local first, last - - for _, hunk in ipairs(hunks) do - for i = 1, #original_lines - #hunk.minus + 1 do - local match = true - for j = 1, #hunk.minus do - if vim.trim(original_lines[i + j - 1]) ~= vim.trim(hunk.minus[j]) then - match = false - break - end - end - if match then - local region_start = i - local region_end = i + #hunk.plus - 1 - if not first or region_start < first then - first = region_start - end - if not last or region_end > last then - last = region_end - end - break + local max_len = math.max(#original_lines, #new_lines) + for i = 1, max_len do + if original_lines[i] ~= new_lines[i] then + if not first then + first = i end + last = i end end - - if first and last then - return first, last - end - - return nil, nil + return new_lines, applied, first, last end ---- Apply a diff (unified or indices) to buffer lines +--- Get diff from block content and buffer lines ---@param block CopilotChat.ui.chat.Block Block containing diff info ---@param bufnr integer Buffer number ----@return table new_lines, boolean applied -function M.apply_diff(block, bufnr) +---@return string diff, string content +function M.get_diff(block, bufnr) local lines = vim.api.nvim_buf_get_lines(bufnr, 0, -1, false) + local content = table.concat(lines, '\n') if block.header.filetype == 'diff' then - return M.apply_unified_diff(block.content, lines) - elseif block.header.start_line and block.header.end_line then - local start_idx = block.header.start_line - local end_idx = block.header.end_line - local original_lines = vim.list_slice(lines, start_idx, end_idx) - local patched_lines = vim.split(block.content, '\n') - local hunks = vim.diff( + return block.content, content + end + + local patched_lines = vim.split(block.content, '\n') + local start_idx = block.header.start_line + local end_idx = block.header.end_line + local original_lines = lines + if start_idx and end_idx then + local new_lines = vim.list_slice(original_lines, 1, start_idx - 1) + vim.list_extend(new_lines, patched_lines) + vim.list_extend(new_lines, original_lines, end_idx + 1, #original_lines) + patched_lines = new_lines + end + + return tostring( + vim.diff( table.concat(original_lines, '\n'), table.concat(patched_lines, '\n'), - { result_type = 'indices', algorithm = 'myers', ctxlen = 3 } + { algorithm = 'myers', ctxlen = 20, interhunkctxlen = 50, ignore_whitespace_change = true } ) - local region_new_lines = M.apply_diff_indices(hunks, original_lines, patched_lines) - local new_lines = {} - -- Add lines before region - for i = 1, start_idx - 1 do - table.insert(new_lines, lines[i]) - end - -- Add patched region - for _, line in ipairs(region_new_lines) do - table.insert(new_lines, line) - end - -- Add lines after region - for i = end_idx + 1, #lines do - table.insert(new_lines, lines[i]) - end - return new_lines, true + ), + content +end + +--- Apply a diff (unified or indices) to buffer lines +---@param block CopilotChat.ui.chat.Block Block containing diff info +---@param bufnr integer Buffer number +---@return table new_lines +function M.apply_diff(block, bufnr) + local diff, content = M.get_diff(block, bufnr) + local new_lines, applied, _, _ = M.apply_unified_diff(diff, content) + if not applied then + vim.notify('Diff for ' .. block.header.filename .. ' failed to apply cleanly for:\n' .. diff, vim.log.levels.WARN) end - return lines, false + + return new_lines end --- Get changed region for diff (unified or indices) @@ -206,24 +184,9 @@ end ---@param bufnr integer Buffer number ---@return number? first, number? last function M.get_diff_region(block, bufnr) - local lines = vim.api.nvim_buf_get_lines(bufnr, 0, -1, false) - if block.header.filetype == 'diff' then - return M.get_unified_diff_region(block.content, lines) - elseif block.header.start_line and block.header.end_line then - local original_lines = vim.api.nvim_buf_get_lines(bufnr, block.header.start_line - 1, block.header.end_line, false) - local patched_lines = vim.split(block.content, '\n') - local hunks = vim.diff( - table.concat(original_lines, '\n'), - table.concat(patched_lines, '\n'), - { result_type = 'indices', algorithm = 'myers', ctxlen = 3 } - ) - if hunks and #hunks > 0 then - local first = hunks[1][1] - local last = hunks[#hunks][1] + hunks[#hunks][2] - 1 - return first, last - end - end - return nil, nil + local diff, content = M.get_diff(block, bufnr) + local _, _, first, last = M.apply_unified_diff(diff, content) + return first, last end return M diff --git a/lua/CopilotChat/vendor/diff_match_patch.lua b/lua/CopilotChat/vendor/diff_match_patch.lua new file mode 100644 index 00000000..b2c397d0 --- /dev/null +++ b/lua/CopilotChat/vendor/diff_match_patch.lua @@ -0,0 +1,2085 @@ +--[[ +* Diff Match and Patch +* Copyright 2018 The diff-match-patch Authors. +* https://github.com/google/diff-match-patch +* +* Based on the JavaScript implementation by Neil Fraser. +* Ported to Lua by Duncan Cross. +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +--]] + +local bit = require('bit') +local band, bor, lshift = bit.band, bit.bor, bit.lshift +local type, setmetatable, ipairs, select = type, setmetatable, ipairs, select +local unpack, tonumber, error = unpack, tonumber, error +local strsub, strbyte, strchar, gmatch, gsub = string.sub, string.byte, string.char, string.gmatch, string.gsub +local strmatch, strfind, strformat = string.match, string.find, string.format +local tinsert, tremove, tconcat = table.insert, table.remove, table.concat +local max, min, floor, ceil, abs = math.max, math.min, math.floor, math.ceil, math.abs +local clock = os.clock + +-- Utility functions. + +local percentEncode_pattern = "[^A-Za-z0-9%-=;',./~!@#$%&*%(%)_%+ %?]" +local function percentEncode_replace(v) + return strformat('%%%02X', strbyte(v)) +end + +local function indexOf(a, b, start) + if #b == 0 then + return nil + end + return strfind(a, b, start, true) +end + +local htmlEncode_pattern = '[&<>\n]' +local htmlEncode_replace = { + ['&'] = '&', + ['<'] = '<', + ['>'] = '>', + ['\n'] = '¶
', +} + +-- Public API Functions +-- (Exported at the end of the script) + +local diff_main, diff_cleanupSemantic, diff_cleanupEfficiency, diff_levenshtein, diff_prettyHtml + +local match_main + +local patch_make, patch_toText, patch_fromText, patch_apply + +--[[ +* The data structure representing a diff is an array of tuples: +* {{DIFF_DELETE, 'Hello'}, {DIFF_INSERT, 'Goodbye'}, {DIFF_EQUAL, ' world.'}} +* which means: delete 'Hello', add 'Goodbye' and keep ' world.' +--]] +local DIFF_DELETE = -1 +local DIFF_INSERT = 1 +local DIFF_EQUAL = 0 + +-- Number of seconds to map a diff before giving up (0 for infinity). +local Diff_Timeout = 1.0 +-- Cost of an empty edit operation in terms of edit characters. +local Diff_EditCost = 4 +-- At what point is no match declared (0.0 = perfection, 1.0 = very loose). +local Match_Threshold = 0.5 +-- How far to search for a match (0 = exact location, 1000+ = broad match). +-- A match this many characters away from the expected location will add +-- 1.0 to the score (0.0 is a perfect match). +local Match_Distance = 1000 +-- When deleting a large block of text (over ~64 characters), how close do +-- the contents have to be to match the expected contents. (0.0 = perfection, +-- 1.0 = very loose). Note that Match_Threshold controls how closely the +-- end points of a delete need to match. +local Patch_DeleteThreshold = 0.5 +-- Chunk size for context length. +local Patch_Margin = 4 +-- The number of bits in an int. +local Match_MaxBits = 32 + +function settings(new) + if new then + Diff_Timeout = new.Diff_Timeout or Diff_Timeout + Diff_EditCost = new.Diff_EditCost or Diff_EditCost + Match_Threshold = new.Match_Threshold or Match_Threshold + Match_Distance = new.Match_Distance or Match_Distance + Patch_DeleteThreshold = new.Patch_DeleteThreshold or Patch_DeleteThreshold + Patch_Margin = new.Patch_Margin or Patch_Margin + Match_MaxBits = new.Match_MaxBits or Match_MaxBits + else + return { + Diff_Timeout = Diff_Timeout, + Diff_EditCost = Diff_EditCost, + Match_Threshold = Match_Threshold, + Match_Distance = Match_Distance, + Patch_DeleteThreshold = Patch_DeleteThreshold, + Patch_Margin = Patch_Margin, + Match_MaxBits = Match_MaxBits, + } + end +end + +-- --------------------------------------------------------------------------- +-- DIFF API +-- --------------------------------------------------------------------------- + +-- The private diff functions +local _diff_compute, _diff_bisect, _diff_halfMatchI, _diff_halfMatch, _diff_cleanupSemanticScore, _diff_cleanupSemanticLossless, _diff_cleanupMerge, _diff_commonPrefix, _diff_commonSuffix, _diff_commonOverlap, _diff_xIndex, _diff_text1, _diff_text2, _diff_toDelta, _diff_fromDelta + +--[[ +* Find the differences between two texts. Simplifies the problem by stripping +* any common prefix or suffix off the texts before diffing. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {boolean} opt_checklines Has no effect in Lua. +* @param {number} opt_deadline Optional time when the diff should be complete +* by. Used internally for recursive calls. Users should set DiffTimeout +* instead. +* @return {Array.>} Array of diff tuples. +--]] +function diff_main(text1, text2, opt_checklines, opt_deadline) + -- Set a deadline by which time the diff must be complete. + if opt_deadline == nil then + if Diff_Timeout <= 0 then + opt_deadline = 2 ^ 31 + else + opt_deadline = clock() + Diff_Timeout + end + end + local deadline = opt_deadline + + -- Check for null inputs. + if text1 == nil or text1 == nil then + error('Null inputs. (diff_main)') + end + + -- Check for equality (speedup). + if text1 == text2 then + if #text1 > 0 then + return { { DIFF_EQUAL, text1 } } + end + return {} + end + + -- LUANOTE: Due to the lack of Unicode support, Lua is incapable of + -- implementing the line-mode speedup. + local checklines = false + + -- Trim off common prefix (speedup). + local commonlength = _diff_commonPrefix(text1, text2) + local commonprefix + if commonlength > 0 then + commonprefix = strsub(text1, 1, commonlength) + text1 = strsub(text1, commonlength + 1) + text2 = strsub(text2, commonlength + 1) + end + + -- Trim off common suffix (speedup). + commonlength = _diff_commonSuffix(text1, text2) + local commonsuffix + if commonlength > 0 then + commonsuffix = strsub(text1, -commonlength) + text1 = strsub(text1, 1, -commonlength - 1) + text2 = strsub(text2, 1, -commonlength - 1) + end + + -- Compute the diff on the middle block. + local diffs = _diff_compute(text1, text2, checklines, deadline) + + -- Restore the prefix and suffix. + if commonprefix then + tinsert(diffs, 1, { DIFF_EQUAL, commonprefix }) + end + if commonsuffix then + diffs[#diffs + 1] = { DIFF_EQUAL, commonsuffix } + end + + _diff_cleanupMerge(diffs) + return diffs +end + +--[[ +* Reduce the number of edits by eliminating semantically trivial equalities. +* @param {Array.>} diffs Array of diff tuples. +--]] +function diff_cleanupSemantic(diffs) + local changes = false + local equalities = {} -- Stack of indices where equalities are found. + local equalitiesLength = 0 -- Keeping our own length var is faster. + local lastEquality = nil + -- Always equal to diffs[equalities[equalitiesLength]][2] + local pointer = 1 -- Index of current position. + -- Number of characters that changed prior to the equality. + local length_insertions1 = 0 + local length_deletions1 = 0 + -- Number of characters that changed after the equality. + local length_insertions2 = 0 + local length_deletions2 = 0 + + while diffs[pointer] do + if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. + equalitiesLength = equalitiesLength + 1 + equalities[equalitiesLength] = pointer + length_insertions1 = length_insertions2 + length_deletions1 = length_deletions2 + length_insertions2 = 0 + length_deletions2 = 0 + lastEquality = diffs[pointer][2] + else -- An insertion or deletion. + if diffs[pointer][1] == DIFF_INSERT then + length_insertions2 = length_insertions2 + #diffs[pointer][2] + else + length_deletions2 = length_deletions2 + #diffs[pointer][2] + end + -- Eliminate an equality that is smaller or equal to the edits on both + -- sides of it. + if + lastEquality + and (#lastEquality <= max(length_insertions1, length_deletions1)) + and (#lastEquality <= max(length_insertions2, length_deletions2)) + then + -- Duplicate record. + tinsert(diffs, equalities[equalitiesLength], { DIFF_DELETE, lastEquality }) + -- Change second copy to insert. + diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT + -- Throw away the equality we just deleted. + equalitiesLength = equalitiesLength - 1 + -- Throw away the previous equality (it needs to be reevaluated). + equalitiesLength = equalitiesLength - 1 + pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 + length_insertions1, length_deletions1 = 0, 0 -- Reset the counters. + length_insertions2, length_deletions2 = 0, 0 + lastEquality = nil + changes = true + end + end + pointer = pointer + 1 + end + + -- Normalize the diff. + if changes then + _diff_cleanupMerge(diffs) + end + _diff_cleanupSemanticLossless(diffs) + + -- Find any overlaps between deletions and insertions. + -- e.g: abcxxxxxxdef + -- -> abcxxxdef + -- e.g: xxxabcdefxxx + -- -> defxxxabc + -- Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 2 + while diffs[pointer] do + if diffs[pointer - 1][1] == DIFF_DELETE and diffs[pointer][1] == DIFF_INSERT then + local deletion = diffs[pointer - 1][2] + local insertion = diffs[pointer][2] + local overlap_length1 = _diff_commonOverlap(deletion, insertion) + local overlap_length2 = _diff_commonOverlap(insertion, deletion) + if overlap_length1 >= overlap_length2 then + if overlap_length1 >= #deletion / 2 or overlap_length1 >= #insertion / 2 then + -- Overlap found. Insert an equality and trim the surrounding edits. + tinsert(diffs, pointer, { DIFF_EQUAL, strsub(insertion, 1, overlap_length1) }) + diffs[pointer - 1][2] = strsub(deletion, 1, #deletion - overlap_length1) + diffs[pointer + 1][2] = strsub(insertion, overlap_length1 + 1) + pointer = pointer + 1 + end + else + if overlap_length2 >= #deletion / 2 or overlap_length2 >= #insertion / 2 then + -- Reverse overlap found. + -- Insert an equality and swap and trim the surrounding edits. + tinsert(diffs, pointer, { DIFF_EQUAL, strsub(deletion, 1, overlap_length2) }) + diffs[pointer - 1] = { DIFF_INSERT, strsub(insertion, 1, #insertion - overlap_length2) } + diffs[pointer + 1] = { DIFF_DELETE, strsub(deletion, overlap_length2 + 1) } + pointer = pointer + 1 + end + end + pointer = pointer + 1 + end + pointer = pointer + 1 + end +end + +--[[ +* Reduce the number of edits by eliminating operationally trivial equalities. +* @param {Array.>} diffs Array of diff tuples. +--]] +function diff_cleanupEfficiency(diffs) + local changes = false + -- Stack of indices where equalities are found. + local equalities = {} + -- Keeping our own length var is faster. + local equalitiesLength = 0 + -- Always equal to diffs[equalities[equalitiesLength]][2] + local lastEquality = nil + -- Index of current position. + local pointer = 1 + + -- The following four are really booleans but are stored as numbers because + -- they are used at one point like this: + -- + -- (pre_ins + pre_del + post_ins + post_del) == 3 + -- + -- ...i.e. checking that 3 of them are true and 1 of them is false. + + -- Is there an insertion operation before the last equality. + local pre_ins = 0 + -- Is there a deletion operation before the last equality. + local pre_del = 0 + -- Is there an insertion operation after the last equality. + local post_ins = 0 + -- Is there a deletion operation after the last equality. + local post_del = 0 + + while diffs[pointer] do + if diffs[pointer][1] == DIFF_EQUAL then -- Equality found. + local diffText = diffs[pointer][2] + if (#diffText < Diff_EditCost) and (post_ins == 1 or post_del == 1) then + -- Candidate found. + equalitiesLength = equalitiesLength + 1 + equalities[equalitiesLength] = pointer + pre_ins, pre_del = post_ins, post_del + lastEquality = diffText + else + -- Not a candidate, and can never become one. + equalitiesLength = 0 + lastEquality = nil + end + post_ins, post_del = 0, 0 + else -- An insertion or deletion. + if diffs[pointer][1] == DIFF_DELETE then + post_del = 1 + else + post_ins = 1 + end + --[[ + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + --]] + if + lastEquality + and ( + (pre_ins + pre_del + post_ins + post_del == 4) + or ((#lastEquality < Diff_EditCost / 2) and (pre_ins + pre_del + post_ins + post_del == 3)) + ) + then + -- Duplicate record. + tinsert(diffs, equalities[equalitiesLength], { DIFF_DELETE, lastEquality }) + -- Change second copy to insert. + diffs[equalities[equalitiesLength] + 1][1] = DIFF_INSERT + -- Throw away the equality we just deleted. + equalitiesLength = equalitiesLength - 1 + lastEquality = nil + if (pre_ins == 1) and (pre_del == 1) then + -- No changes made which could affect previous entry, keep going. + post_ins, post_del = 1, 1 + equalitiesLength = 0 + else + -- Throw away the previous equality. + equalitiesLength = equalitiesLength - 1 + pointer = (equalitiesLength > 0) and equalities[equalitiesLength] or 0 + post_ins, post_del = 0, 0 + end + changes = true + end + end + pointer = pointer + 1 + end + + if changes then + _diff_cleanupMerge(diffs) + end +end + +--[[ +* Compute the Levenshtein distance; the number of inserted, deleted or +* substituted characters. +* @param {Array.>} diffs Array of diff tuples. +* @return {number} Number of changes. +--]] +function diff_levenshtein(diffs) + local levenshtein = 0 + local insertions, deletions = 0, 0 + for x, diff in ipairs(diffs) do + local op, data = diff[1], diff[2] + if op == DIFF_INSERT then + insertions = insertions + #data + elseif op == DIFF_DELETE then + deletions = deletions + #data + elseif op == DIFF_EQUAL then + -- A deletion and an insertion is one substitution. + levenshtein = levenshtein + max(insertions, deletions) + insertions = 0 + deletions = 0 + end + end + levenshtein = levenshtein + max(insertions, deletions) + return levenshtein +end + +--[[ +* Convert a diff array into a pretty HTML report. +* @param {Array.>} diffs Array of diff tuples. +* @return {string} HTML representation. +--]] +function diff_prettyHtml(diffs) + local html = {} + for x, diff in ipairs(diffs) do + local op = diff[1] -- Operation (insert, delete, equal) + local data = diff[2] -- Text of change. + local text = gsub(data, htmlEncode_pattern, htmlEncode_replace) + if op == DIFF_INSERT then + html[x] = '' .. text .. '' + elseif op == DIFF_DELETE then + html[x] = '' .. text .. '' + elseif op == DIFF_EQUAL then + html[x] = '' .. text .. '' + end + end + return tconcat(html) +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE DIFF FUNCTIONS +-- --------------------------------------------------------------------------- + +--[[ +* Find the differences between two texts. Assumes that the texts do not +* have any common prefix or suffix. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {boolean} checklines Has no effect in Lua. +* @param {number} deadline Time when the diff should be complete by. +* @return {Array.>} Array of diff tuples. +* @private +--]] +function _diff_compute(text1, text2, checklines, deadline) + if #text1 == 0 then + -- Just add some text (speedup). + return { { DIFF_INSERT, text2 } } + end + + if #text2 == 0 then + -- Just delete some text (speedup). + return { { DIFF_DELETE, text1 } } + end + + local diffs + + local longtext = (#text1 > #text2) and text1 or text2 + local shorttext = (#text1 > #text2) and text2 or text1 + local i = indexOf(longtext, shorttext) + + if i ~= nil then + -- Shorter text is inside the longer text (speedup). + diffs = { + { DIFF_INSERT, strsub(longtext, 1, i - 1) }, + { DIFF_EQUAL, shorttext }, + { DIFF_INSERT, strsub(longtext, i + #shorttext) }, + } + -- Swap insertions for deletions if diff is reversed. + if #text1 > #text2 then + diffs[1][1], diffs[3][1] = DIFF_DELETE, DIFF_DELETE + end + return diffs + end + + if #shorttext == 1 then + -- Single character string. + -- After the previous speedup, the character can't be an equality. + return { { DIFF_DELETE, text1 }, { DIFF_INSERT, text2 } } + end + + -- Check to see if the problem can be split in two. + do + local text1_a, text1_b, text2_a, text2_b, mid_common = _diff_halfMatch(text1, text2) + + if text1_a then + -- A half-match was found, sort out the return data. + -- Send both pairs off for separate processing. + local diffs_a = diff_main(text1_a, text2_a, checklines, deadline) + local diffs_b = diff_main(text1_b, text2_b, checklines, deadline) + -- Merge the results. + local diffs_a_len = #diffs_a + diffs = diffs_a + diffs[diffs_a_len + 1] = { DIFF_EQUAL, mid_common } + for i, b_diff in ipairs(diffs_b) do + diffs[diffs_a_len + 1 + i] = b_diff + end + return diffs + end + end + + return _diff_bisect(text1, text2, deadline) +end + +--[[ +* Find the 'middle snake' of a diff, split the problem in two +* and return the recursively constructed diff. +* See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. +* @param {string} text1 Old string to be diffed. +* @param {string} text2 New string to be diffed. +* @param {number} deadline Time at which to bail if not yet complete. +* @return {Array.>} Array of diff tuples. +* @private +--]] +function _diff_bisect(text1, text2, deadline) + -- Cache the text lengths to prevent multiple calls. + local text1_length = #text1 + local text2_length = #text2 + local _sub, _element + local max_d = ceil((text1_length + text2_length) / 2) + local v_offset = max_d + local v_length = 2 * max_d + local v1 = {} + local v2 = {} + -- Setting all elements to -1 is faster in Lua than mixing integers and nil. + for x = 0, v_length - 1 do + v1[x] = -1 + v2[x] = -1 + end + v1[v_offset + 1] = 0 + v2[v_offset + 1] = 0 + local delta = text1_length - text2_length + -- If the total number of characters is odd, then + -- the front path will collide with the reverse path. + local front = (delta % 2 ~= 0) + -- Offsets for start and end of k loop. + -- Prevents mapping of space beyond the grid. + local k1start = 0 + local k1end = 0 + local k2start = 0 + local k2end = 0 + for d = 0, max_d - 1 do + -- Bail out if deadline is reached. + if clock() > deadline then + break + end + + -- Walk the front path one step. + for k1 = -d + k1start, d - k1end, 2 do + local k1_offset = v_offset + k1 + local x1 + if (k1 == -d) or ((k1 ~= d) and (v1[k1_offset - 1] < v1[k1_offset + 1])) then + x1 = v1[k1_offset + 1] + else + x1 = v1[k1_offset - 1] + 1 + end + local y1 = x1 - k1 + while (x1 <= text1_length) and (y1 <= text2_length) and (strsub(text1, x1, x1) == strsub(text2, y1, y1)) do + x1 = x1 + 1 + y1 = y1 + 1 + end + v1[k1_offset] = x1 + if x1 > text1_length + 1 then + -- Ran off the right of the graph. + k1end = k1end + 2 + elseif y1 > text2_length + 1 then + -- Ran off the bottom of the graph. + k1start = k1start + 2 + elseif front then + local k2_offset = v_offset + delta - k1 + if k2_offset >= 0 and k2_offset < v_length and v2[k2_offset] ~= -1 then + -- Mirror x2 onto top-left coordinate system. + local x2 = text1_length - v2[k2_offset] + 1 + if x1 > x2 then + -- Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline) + end + end + end + end + + -- Walk the reverse path one step. + for k2 = -d + k2start, d - k2end, 2 do + local k2_offset = v_offset + k2 + local x2 + if (k2 == -d) or ((k2 ~= d) and (v2[k2_offset - 1] < v2[k2_offset + 1])) then + x2 = v2[k2_offset + 1] + else + x2 = v2[k2_offset - 1] + 1 + end + local y2 = x2 - k2 + while (x2 <= text1_length) and (y2 <= text2_length) and (strsub(text1, -x2, -x2) == strsub(text2, -y2, -y2)) do + x2 = x2 + 1 + y2 = y2 + 1 + end + v2[k2_offset] = x2 + if x2 > text1_length + 1 then + -- Ran off the left of the graph. + k2end = k2end + 2 + elseif y2 > text2_length + 1 then + -- Ran off the top of the graph. + k2start = k2start + 2 + elseif not front then + local k1_offset = v_offset + delta - k2 + if k1_offset >= 0 and k1_offset < v_length and v1[k1_offset] ~= -1 then + local x1 = v1[k1_offset] + local y1 = v_offset + x1 - k1_offset + -- Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2 + 1 + if x1 > x2 then + -- Overlap detected. + return _diff_bisectSplit(text1, text2, x1, y1, deadline) + end + end + end + end + end + -- Diff took too long and hit the deadline or + -- number of diffs equals number of characters, no commonality at all. + return { { DIFF_DELETE, text1 }, { DIFF_INSERT, text2 } } +end + +--[[ + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param {string} text1 Old string to be diffed. + * @param {string} text2 New string to be diffed. + * @param {number} x Index of split point in text1. + * @param {number} y Index of split point in text2. + * @param {number} deadline Time at which to bail if not yet complete. + * @return {Array.>} Array of diff tuples. + * @private +--]] +function _diff_bisectSplit(text1, text2, x, y, deadline) + local text1a = strsub(text1, 1, x - 1) + local text2a = strsub(text2, 1, y - 1) + local text1b = strsub(text1, x) + local text2b = strsub(text2, y) + + -- Compute both diffs serially. + local diffs = diff_main(text1a, text2a, false, deadline) + local diffsb = diff_main(text1b, text2b, false, deadline) + + local diffs_len = #diffs + for i, v in ipairs(diffsb) do + diffs[diffs_len + i] = v + end + return diffs +end + +--[[ +* Determine the common prefix of two strings. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the start of each +* string. +--]] +function _diff_commonPrefix(text1, text2) + -- Quick check for common null cases. + if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, 1) ~= strbyte(text2, 1)) then + return 0 + end + -- Binary search. + -- Performance analysis: https://neil.fraser.name/news/2007/10/09/ + local pointermin = 1 + local pointermax = min(#text1, #text2) + local pointermid = pointermax + local pointerstart = 1 + while pointermin < pointermid do + if strsub(text1, pointerstart, pointermid) == strsub(text2, pointerstart, pointermid) then + pointermin = pointermid + pointerstart = pointermin + else + pointermax = pointermid + end + pointermid = floor(pointermin + (pointermax - pointermin) / 2) + end + return pointermid +end + +--[[ +* Determine the common suffix of two strings. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the end of each string. +--]] +function _diff_commonSuffix(text1, text2) + -- Quick check for common null cases. + if (#text1 == 0) or (#text2 == 0) or (strbyte(text1, -1) ~= strbyte(text2, -1)) then + return 0 + end + -- Binary search. + -- Performance analysis: https://neil.fraser.name/news/2007/10/09/ + local pointermin = 1 + local pointermax = min(#text1, #text2) + local pointermid = pointermax + local pointerend = 1 + while pointermin < pointermid do + if strsub(text1, -pointermid, -pointerend) == strsub(text2, -pointermid, -pointerend) then + pointermin = pointermid + pointerend = pointermin + else + pointermax = pointermid + end + pointermid = floor(pointermin + (pointermax - pointermin) / 2) + end + return pointermid +end + +--[[ +* Determine if the suffix of one string is the prefix of another. +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {number} The number of characters common to the end of the first +* string and the start of the second string. +* @private +--]] +function _diff_commonOverlap(text1, text2) + -- Cache the text lengths to prevent multiple calls. + local text1_length = #text1 + local text2_length = #text2 + -- Eliminate the null case. + if text1_length == 0 or text2_length == 0 then + return 0 + end + -- Truncate the longer string. + if text1_length > text2_length then + text1 = strsub(text1, text1_length - text2_length + 1) + elseif text1_length < text2_length then + text2 = strsub(text2, 1, text1_length) + end + local text_length = min(text1_length, text2_length) + -- Quick check for the worst case. + if text1 == text2 then + return text_length + end + + -- Start by looking for a single character match + -- and increase length until no match is found. + -- Performance analysis: https://neil.fraser.name/news/2010/11/04/ + local best = 0 + local length = 1 + while true do + local pattern = strsub(text1, text_length - length + 1) + local found = strfind(text2, pattern, 1, true) + if found == nil then + return best + end + length = length + found - 1 + if found == 1 or strsub(text1, text_length - length + 1) == strsub(text2, 1, length) then + best = length + length = length + 1 + end + end +end + +--[[ +* Does a substring of shorttext exist within longtext such that the substring +* is at least half the length of longtext? +* This speedup can produce non-minimal diffs. +* Closure, but does not reference any external variables. +* @param {string} longtext Longer string. +* @param {string} shorttext Shorter string. +* @param {number} i Start index of quarter length substring within longtext. +* @return {?Array.} Five element Array, containing the prefix of +* longtext, the suffix of longtext, the prefix of shorttext, the suffix +* of shorttext and the common middle. Or nil if there was no match. +* @private +--]] +function _diff_halfMatchI(longtext, shorttext, i) + -- Start with a 1/4 length substring at position i as a seed. + local seed = strsub(longtext, i, i + floor(#longtext / 4)) + local j = 0 -- LUANOTE: do not change to 1, was originally -1 + local best_common = '' + local best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b + while true do + j = indexOf(shorttext, seed, j + 1) + if j == nil then + break + end + local prefixLength = _diff_commonPrefix(strsub(longtext, i), strsub(shorttext, j)) + local suffixLength = _diff_commonSuffix(strsub(longtext, 1, i - 1), strsub(shorttext, 1, j - 1)) + if #best_common < suffixLength + prefixLength then + best_common = strsub(shorttext, j - suffixLength, j - 1) .. strsub(shorttext, j, j + prefixLength - 1) + best_longtext_a = strsub(longtext, 1, i - suffixLength - 1) + best_longtext_b = strsub(longtext, i + prefixLength) + best_shorttext_a = strsub(shorttext, 1, j - suffixLength - 1) + best_shorttext_b = strsub(shorttext, j + prefixLength) + end + end + if #best_common * 2 >= #longtext then + return { best_longtext_a, best_longtext_b, best_shorttext_a, best_shorttext_b, best_common } + else + return nil + end +end + +--[[ +* Do the two texts share a substring which is at least half the length of the +* longer text? +* @param {string} text1 First string. +* @param {string} text2 Second string. +* @return {?Array.} Five element Array, containing the prefix of +* text1, the suffix of text1, the prefix of text2, the suffix of +* text2 and the common middle. Or nil if there was no match. +* @private +--]] +function _diff_halfMatch(text1, text2) + if Diff_Timeout <= 0 then + -- Don't risk returning a non-optimal diff if we have unlimited time. + return nil + end + local longtext = (#text1 > #text2) and text1 or text2 + local shorttext = (#text1 > #text2) and text2 or text1 + if (#longtext < 4) or (#shorttext * 2 < #longtext) then + return nil -- Pointless. + end + + -- First check if the second quarter is the seed for a half-match. + local hm1 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 4)) + -- Check again based on the third quarter. + local hm2 = _diff_halfMatchI(longtext, shorttext, ceil(#longtext / 2)) + local hm + if not hm1 and not hm2 then + return nil + elseif not hm2 then + hm = hm1 + elseif not hm1 then + hm = hm2 + else + -- Both matched. Select the longest. + hm = (#hm1[5] > #hm2[5]) and hm1 or hm2 + end + + -- A half-match was found, sort out the return data. + local text1_a, text1_b, text2_a, text2_b + if #text1 > #text2 then + text1_a, text1_b = hm[1], hm[2] + text2_a, text2_b = hm[3], hm[4] + else + text2_a, text2_b = hm[1], hm[2] + text1_a, text1_b = hm[3], hm[4] + end + local mid_common = hm[5] + return text1_a, text1_b, text2_a, text2_b, mid_common +end + +--[[ +* Given two strings, compute a score representing whether the internal +* boundary falls on logical boundaries. +* Scores range from 6 (best) to 0 (worst). +* @param {string} one First string. +* @param {string} two Second string. +* @return {number} The score. +* @private +--]] +function _diff_cleanupSemanticScore(one, two) + if (#one == 0) or (#two == 0) then + -- Edges are the best. + return 6 + end + + -- Each port of this function behaves slightly differently due to + -- subtle differences in each language's definition of things like + -- 'whitespace'. Since this function's purpose is largely cosmetic, + -- the choice has been made to use each language's native features + -- rather than force total conformity. + local char1 = strsub(one, -1) + local char2 = strsub(two, 1, 1) + local nonAlphaNumeric1 = strmatch(char1, '%W') + local nonAlphaNumeric2 = strmatch(char2, '%W') + local whitespace1 = nonAlphaNumeric1 and strmatch(char1, '%s') + local whitespace2 = nonAlphaNumeric2 and strmatch(char2, '%s') + local lineBreak1 = whitespace1 and strmatch(char1, '%c') + local lineBreak2 = whitespace2 and strmatch(char2, '%c') + local blankLine1 = lineBreak1 and strmatch(one, '\n\r?\n$') + local blankLine2 = lineBreak2 and strmatch(two, '^\r?\n\r?\n') + + if blankLine1 or blankLine2 then + -- Five points for blank lines. + return 5 + elseif lineBreak1 or lineBreak2 then + -- Four points for line breaks. + return 4 + elseif nonAlphaNumeric1 and not whitespace1 and whitespace2 then + -- Three points for end of sentences. + return 3 + elseif whitespace1 or whitespace2 then + -- Two points for whitespace. + return 2 + elseif nonAlphaNumeric1 or nonAlphaNumeric2 then + -- One point for non-alphanumeric. + return 1 + end + return 0 +end + +--[[ +* Look for single edits surrounded on both sides by equalities +* which can be shifted sideways to align the edit to a word boundary. +* e.g: The cat came. -> The cat came. +* @param {Array.>} diffs Array of diff tuples. +--]] +function _diff_cleanupSemanticLossless(diffs) + local pointer = 2 + -- Intentionally ignore the first and last element (don't need checking). + while diffs[pointer + 1] do + local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] + if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then + -- This is a single edit surrounded by equalities. + local diff = diffs[pointer] + + local equality1 = prevDiff[2] + local edit = diff[2] + local equality2 = nextDiff[2] + + -- First, shift the edit as far left as possible. + local commonOffset = _diff_commonSuffix(equality1, edit) + if commonOffset > 0 then + local commonString = strsub(edit, -commonOffset) + equality1 = strsub(equality1, 1, -commonOffset - 1) + edit = commonString .. strsub(edit, 1, -commonOffset - 1) + equality2 = commonString .. equality2 + end + + -- Second, step character by character right, looking for the best fit. + local bestEquality1 = equality1 + local bestEdit = edit + local bestEquality2 = equality2 + local bestScore = _diff_cleanupSemanticScore(equality1, edit) + _diff_cleanupSemanticScore(edit, equality2) + + while strbyte(edit, 1) == strbyte(equality2, 1) do + equality1 = equality1 .. strsub(edit, 1, 1) + edit = strsub(edit, 2) .. strsub(equality2, 1, 1) + equality2 = strsub(equality2, 2) + local score = _diff_cleanupSemanticScore(equality1, edit) + _diff_cleanupSemanticScore(edit, equality2) + -- The >= encourages trailing rather than leading whitespace on edits. + if score >= bestScore then + bestScore = score + bestEquality1 = equality1 + bestEdit = edit + bestEquality2 = equality2 + end + end + if prevDiff[2] ~= bestEquality1 then + -- We have an improvement, save it back to the diff. + if #bestEquality1 > 0 then + diffs[pointer - 1][2] = bestEquality1 + else + tremove(diffs, pointer - 1) + pointer = pointer - 1 + end + diffs[pointer][2] = bestEdit + if #bestEquality2 > 0 then + diffs[pointer + 1][2] = bestEquality2 + else + tremove(diffs, pointer + 1, 1) + pointer = pointer - 1 + end + end + end + pointer = pointer + 1 + end +end + +--[[ +* Reorder and merge like edit sections. Merge equalities. +* Any edit section can move as long as it doesn't cross an equality. +* @param {Array.>} diffs Array of diff tuples. +--]] +function _diff_cleanupMerge(diffs) + diffs[#diffs + 1] = { DIFF_EQUAL, '' } -- Add a dummy entry at the end. + local pointer = 1 + local count_delete, count_insert = 0, 0 + local text_delete, text_insert = '', '' + local commonlength + while diffs[pointer] do + local diff_type = diffs[pointer][1] + if diff_type == DIFF_INSERT then + count_insert = count_insert + 1 + text_insert = text_insert .. diffs[pointer][2] + pointer = pointer + 1 + elseif diff_type == DIFF_DELETE then + count_delete = count_delete + 1 + text_delete = text_delete .. diffs[pointer][2] + pointer = pointer + 1 + elseif diff_type == DIFF_EQUAL then + -- Upon reaching an equality, check for prior redundancies. + if count_delete + count_insert > 1 then + if (count_delete > 0) and (count_insert > 0) then + -- Factor out any common prefixies. + commonlength = _diff_commonPrefix(text_insert, text_delete) + if commonlength > 0 then + local back_pointer = pointer - count_delete - count_insert + if (back_pointer > 1) and (diffs[back_pointer - 1][1] == DIFF_EQUAL) then + diffs[back_pointer - 1][2] = diffs[back_pointer - 1][2] .. strsub(text_insert, 1, commonlength) + else + tinsert(diffs, 1, { DIFF_EQUAL, strsub(text_insert, 1, commonlength) }) + pointer = pointer + 1 + end + text_insert = strsub(text_insert, commonlength + 1) + text_delete = strsub(text_delete, commonlength + 1) + end + -- Factor out any common suffixies. + commonlength = _diff_commonSuffix(text_insert, text_delete) + if commonlength ~= 0 then + diffs[pointer][2] = strsub(text_insert, -commonlength) .. diffs[pointer][2] + text_insert = strsub(text_insert, 1, -commonlength - 1) + text_delete = strsub(text_delete, 1, -commonlength - 1) + end + end + -- Delete the offending records and add the merged ones. + pointer = pointer - count_delete - count_insert + for i = 1, count_delete + count_insert do + tremove(diffs, pointer) + end + if #text_delete > 0 then + tinsert(diffs, pointer, { DIFF_DELETE, text_delete }) + pointer = pointer + 1 + end + if #text_insert > 0 then + tinsert(diffs, pointer, { DIFF_INSERT, text_insert }) + pointer = pointer + 1 + end + pointer = pointer + 1 + elseif (pointer > 1) and (diffs[pointer - 1][1] == DIFF_EQUAL) then + -- Merge this equality with the previous one. + diffs[pointer - 1][2] = diffs[pointer - 1][2] .. diffs[pointer][2] + tremove(diffs, pointer) + else + pointer = pointer + 1 + end + count_insert, count_delete = 0, 0 + text_delete, text_insert = '', '' + end + end + if diffs[#diffs][2] == '' then + diffs[#diffs] = nil -- Remove the dummy entry at the end. + end + + -- Second pass: look for single edits surrounded on both sides by equalities + -- which can be shifted sideways to eliminate an equality. + -- e.g: ABAC -> ABAC + local changes = false + pointer = 2 + -- Intentionally ignore the first and last element (don't need checking). + while pointer < #diffs do + local prevDiff, nextDiff = diffs[pointer - 1], diffs[pointer + 1] + if (prevDiff[1] == DIFF_EQUAL) and (nextDiff[1] == DIFF_EQUAL) then + -- This is a single edit surrounded by equalities. + local diff = diffs[pointer] + local currentText = diff[2] + local prevText = prevDiff[2] + local nextText = nextDiff[2] + if #prevText == 0 then + tremove(diffs, pointer - 1) + changes = true + elseif strsub(currentText, -#prevText) == prevText then + -- Shift the edit over the previous equality. + diff[2] = prevText .. strsub(currentText, 1, -#prevText - 1) + nextDiff[2] = prevText .. nextDiff[2] + tremove(diffs, pointer - 1) + changes = true + elseif strsub(currentText, 1, #nextText) == nextText then + -- Shift the edit over the next equality. + prevDiff[2] = prevText .. nextText + diff[2] = strsub(currentText, #nextText + 1) .. nextText + tremove(diffs, pointer + 1) + changes = true + end + end + pointer = pointer + 1 + end + -- If shifts were made, the diff needs reordering and another shift sweep. + if changes then + -- LUANOTE: no return value, but necessary to use 'return' to get + -- tail calls. + return _diff_cleanupMerge(diffs) + end +end + +--[[ +* loc is a location in text1, compute and return the equivalent location in +* text2. +* e.g. 'The cat' vs 'The big cat', 1->1, 5->8 +* @param {Array.>} diffs Array of diff tuples. +* @param {number} loc Location within text1. +* @return {number} Location within text2. +--]] +function _diff_xIndex(diffs, loc) + local chars1 = 1 + local chars2 = 1 + local last_chars1 = 1 + local last_chars2 = 1 + local x + for _x, diff in ipairs(diffs) do + x = _x + if diff[1] ~= DIFF_INSERT then -- Equality or deletion. + chars1 = chars1 + #diff[2] + end + if diff[1] ~= DIFF_DELETE then -- Equality or insertion. + chars2 = chars2 + #diff[2] + end + if chars1 > loc then -- Overshot the location. + break + end + last_chars1 = chars1 + last_chars2 = chars2 + end + -- Was the location deleted? + if diffs[x + 1] and (diffs[x][1] == DIFF_DELETE) then + return last_chars2 + end + -- Add the remaining character length. + return last_chars2 + (loc - last_chars1) +end + +--[[ +* Compute and return the source text (all equalities and deletions). +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Source text. +--]] +function _diff_text1(diffs) + local text = {} + for x, diff in ipairs(diffs) do + if diff[1] ~= DIFF_INSERT then + text[#text + 1] = diff[2] + end + end + return tconcat(text) +end + +--[[ +* Compute and return the destination text (all equalities and insertions). +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Destination text. +--]] +function _diff_text2(diffs) + local text = {} + for x, diff in ipairs(diffs) do + if diff[1] ~= DIFF_DELETE then + text[#text + 1] = diff[2] + end + end + return tconcat(text) +end + +--[[ +* Crush the diff into an encoded string which describes the operations +* required to transform text1 into text2. +* E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. +* Operations are tab-separated. Inserted text is escaped using %xx notation. +* @param {Array.>} diffs Array of diff tuples. +* @return {string} Delta text. +--]] +function _diff_toDelta(diffs) + local text = {} + for x, diff in ipairs(diffs) do + local op, data = diff[1], diff[2] + if op == DIFF_INSERT then + text[x] = '+' .. gsub(data, percentEncode_pattern, percentEncode_replace) + elseif op == DIFF_DELETE then + text[x] = '-' .. #data + elseif op == DIFF_EQUAL then + text[x] = '=' .. #data + end + end + return tconcat(text, '\t') +end + +--[[ +* Given the original text1, and an encoded string which describes the +* operations required to transform text1 into text2, compute the full diff. +* @param {string} text1 Source string for the diff. +* @param {string} delta Delta text. +* @return {Array.>} Array of diff tuples. +* @throws {Errorend If invalid input. +--]] +function _diff_fromDelta(text1, delta) + local diffs = {} + local diffsLength = 0 -- Keeping our own length var is faster + local pointer = 1 -- Cursor in text1 + for token in gmatch(delta, '[^\t]+') do + -- Each token begins with a one character parameter which specifies the + -- operation of this token (delete, insert, equality). + local tokenchar, param = strsub(token, 1, 1), strsub(token, 2) + if tokenchar == '+' then + local invalidDecode = false + local decoded = gsub(param, '%%(.?.?)', function(c) + local n = tonumber(c, 16) + if (#c ~= 2) or (n == nil) then + invalidDecode = true + return '' + end + return strchar(n) + end) + if invalidDecode then + -- Malformed URI sequence. + error('Illegal escape in _diff_fromDelta: ' .. param) + end + diffsLength = diffsLength + 1 + diffs[diffsLength] = { DIFF_INSERT, decoded } + elseif (tokenchar == '-') or (tokenchar == '=') then + local n = tonumber(param) + if (n == nil) or (n < 0) then + error('Invalid number in _diff_fromDelta: ' .. param) + end + local text = strsub(text1, pointer, pointer + n - 1) + pointer = pointer + n + if tokenchar == '=' then + diffsLength = diffsLength + 1 + diffs[diffsLength] = { DIFF_EQUAL, text } + else + diffsLength = diffsLength + 1 + diffs[diffsLength] = { DIFF_DELETE, text } + end + else + error('Invalid diff operation in _diff_fromDelta: ' .. token) + end + end + if pointer ~= #text1 + 1 then + error('Delta length (' .. (pointer - 1) .. ') does not equal source text length (' .. #text1 .. ').') + end + return diffs +end + +-- --------------------------------------------------------------------------- +-- MATCH API +-- --------------------------------------------------------------------------- + +local _match_bitap, _match_alphabet + +--[[ +* Locate the best instance of 'pattern' in 'text' near 'loc'. +* @param {string} text The text to search. +* @param {string} pattern The pattern to search for. +* @param {number} loc The location to search around. +* @return {number} Best match index or -1. +--]] +function match_main(text, pattern, loc) + -- Check for null inputs. + if text == nil or pattern == nil or loc == nil then + error('Null inputs. (match_main)') + end + + if text == pattern then + -- Shortcut (potentially not guaranteed by the algorithm) + return 1 + elseif #text == 0 then + -- Nothing to match. + return -1 + end + loc = max(1, min(loc, #text)) + if strsub(text, loc, loc + #pattern - 1) == pattern then + -- Perfect match at the perfect spot! (Includes case of null pattern) + return loc + else + -- Do a fuzzy compare. + return _match_bitap(text, pattern, loc) + end +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE MATCH FUNCTIONS +-- --------------------------------------------------------------------------- + +--[[ +* Initialise the alphabet for the Bitap algorithm. +* @param {string} pattern The text to encode. +* @return {Object} Hash of character locations. +* @private +--]] +function _match_alphabet(pattern) + local s = {} + local i = 0 + for c in gmatch(pattern, '.') do + s[c] = bor(s[c] or 0, lshift(1, #pattern - i - 1)) + i = i + 1 + end + return s +end + +--[[ +* Locate the best instance of 'pattern' in 'text' near 'loc' using the +* Bitap algorithm. +* @param {string} text The text to search. +* @param {string} pattern The pattern to search for. +* @param {number} loc The location to search around. +* @return {number} Best match index or -1. +* @private +--]] +function _match_bitap(text, pattern, loc) + if #pattern > Match_MaxBits then + error('Pattern too long.') + end + + -- Initialise the alphabet. + local s = _match_alphabet(pattern) + + --[[ + * Compute and return the score for a match with e errors and x location. + * Accesses loc and pattern through being a closure. + * @param {number} e Number of errors in match. + * @param {number} x Location of match. + * @return {number} Overall score for match (0.0 = good, 1.0 = bad). + * @private + --]] + local function _match_bitapScore(e, x) + local accuracy = e / #pattern + local proximity = abs(loc - x) + if Match_Distance == 0 then + -- Dodge divide by zero error. + return (proximity == 0) and 1 or accuracy + end + return accuracy + (proximity / Match_Distance) + end + + -- Highest score beyond which we give up. + local score_threshold = Match_Threshold + -- Is there a nearby exact match? (speedup) + local best_loc = indexOf(text, pattern, loc) + if best_loc then + score_threshold = min(_match_bitapScore(0, best_loc), score_threshold) + -- LUANOTE: Ideally we'd also check from the other direction, but Lua + -- doesn't have an efficent lastIndexOf function. + end + + -- Initialise the bit arrays. + local matchmask = lshift(1, #pattern - 1) + best_loc = -1 + + local bin_min, bin_mid + local bin_max = #pattern + #text + local last_rd + for d = 0, #pattern - 1, 1 do + -- Scan for the best match; each iteration allows for one more error. + -- Run a binary search to determine how far from 'loc' we can stray at this + -- error level. + bin_min = 0 + bin_mid = bin_max + while bin_min < bin_mid do + if _match_bitapScore(d, loc + bin_mid) <= score_threshold then + bin_min = bin_mid + else + bin_max = bin_mid + end + bin_mid = floor(bin_min + (bin_max - bin_min) / 2) + end + -- Use the result from this iteration as the maximum for the next. + bin_max = bin_mid + local start = max(1, loc - bin_mid + 1) + local finish = min(loc + bin_mid, #text) + #pattern + + local rd = {} + for j = start, finish do + rd[j] = 0 + end + rd[finish + 1] = lshift(1, d) - 1 + for j = finish, start, -1 do + local charMatch = s[strsub(text, j - 1, j - 1)] or 0 + if d == 0 then -- First pass: exact match. + rd[j] = band(bor((rd[j + 1] * 2), 1), charMatch) + else + -- Subsequent passes: fuzzy match. + -- Functions instead of operators make this hella messy. + rd[j] = bor( + band(bor(lshift(rd[j + 1], 1), 1), charMatch), + bor(bor(lshift(bor(last_rd[j + 1], last_rd[j]), 1), 1), last_rd[j + 1]) + ) + end + if band(rd[j], matchmask) ~= 0 then + local score = _match_bitapScore(d, j - 1) + -- This match will almost certainly be better than any existing match. + -- But check anyway. + if score <= score_threshold then + -- Told you so. + score_threshold = score + best_loc = j - 1 + if best_loc > loc then + -- When passing loc, don't exceed our current distance from loc. + start = max(1, loc * 2 - best_loc) + else + -- Already passed loc, downhill from here on in. + break + end + end + end + end + -- No hope for a (better) match at greater error levels. + if _match_bitapScore(d + 1, loc) > score_threshold then + break + end + last_rd = rd + end + return best_loc +end + +-- ----------------------------------------------------------------------------- +-- PATCH API +-- ----------------------------------------------------------------------------- + +local _patch_addContext, _patch_deepCopy, _patch_addPadding, _patch_splitMax, _patch_appendText, _new_patch_obj + +--[[ +* Compute a list of patches to turn text1 into text2. +* Use diffs if provided, otherwise compute it ourselves. +* There are four ways to call this function, depending on what data is +* available to the caller: +* Method 1: +* a = text1, b = text2 +* Method 2: +* a = diffs +* Method 3 (optimal): +* a = text1, b = diffs +* Method 4 (deprecated, use method 3): +* a = text1, b = text2, c = diffs +* +* @param {string|Array.>} a text1 (methods 1,3,4) or +* Array of diff tuples for text1 to text2 (method 2). +* @param {string|Array.>} opt_b text2 (methods 1,4) or +* Array of diff tuples for text1 to text2 (method 3) or undefined (method 2). +* @param {string|Array.>} opt_c Array of diff tuples for +* text1 to text2 (method 4) or undefined (methods 1,2,3). +* @return {Array.<_new_patch_obj>} Array of patch objects. +--]] +function patch_make(a, opt_b, opt_c) + local text1, diffs + local type_a, type_b, type_c = type(a), type(opt_b), type(opt_c) + if (type_a == 'string') and (type_b == 'string') and (type_c == 'nil') then + -- Method 1: text1, text2 + -- Compute diffs from text1 and text2. + text1 = a + diffs = diff_main(text1, opt_b, true) + if #diffs > 2 then + diff_cleanupSemantic(diffs) + diff_cleanupEfficiency(diffs) + end + elseif (type_a == 'table') and (type_b == 'nil') and (type_c == 'nil') then + -- Method 2: diffs + -- Compute text1 from diffs. + diffs = a + text1 = _diff_text1(diffs) + elseif (type_a == 'string') and (type_b == 'table') and (type_c == 'nil') then + -- Method 3: text1, diffs + text1 = a + diffs = opt_b + elseif (type_a == 'string') and (type_b == 'string') and (type_c == 'table') then + -- Method 4: text1, text2, diffs + -- text2 is not used. + text1 = a + diffs = opt_c + else + error('Unknown call format to patch_make.') + end + + if diffs[1] == nil then + return {} -- Get rid of the null case. + end + + local patches = {} + local patch = _new_patch_obj() + local patchDiffLength = 0 -- Keeping our own length var is faster. + local char_count1 = 0 -- Number of characters into the text1 string. + local char_count2 = 0 -- Number of characters into the text2 string. + -- Start with text1 (prepatch_text) and apply the diffs until we arrive at + -- text2 (postpatch_text). We recreate the patches one by one to determine + -- context info. + local prepatch_text, postpatch_text = text1, text1 + for x, diff in ipairs(diffs) do + local diff_type, diff_text = diff[1], diff[2] + + if (patchDiffLength == 0) and (diff_type ~= DIFF_EQUAL) then + -- A new patch starts here. + patch.start1 = char_count1 + 1 + patch.start2 = char_count2 + 1 + end + + if diff_type == DIFF_INSERT then + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + patch.length2 = patch.length2 + #diff_text + postpatch_text = strsub(postpatch_text, 1, char_count2) .. diff_text .. strsub(postpatch_text, char_count2 + 1) + elseif diff_type == DIFF_DELETE then + patch.length1 = patch.length1 + #diff_text + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + postpatch_text = strsub(postpatch_text, 1, char_count2) .. strsub(postpatch_text, char_count2 + #diff_text + 1) + elseif diff_type == DIFF_EQUAL then + if (#diff_text <= Patch_Margin * 2) and (patchDiffLength ~= 0) and (#diffs ~= x) then + -- Small equality inside a patch. + patchDiffLength = patchDiffLength + 1 + patch.diffs[patchDiffLength] = diff + patch.length1 = patch.length1 + #diff_text + patch.length2 = patch.length2 + #diff_text + elseif #diff_text >= Patch_Margin * 2 then + -- Time for a new patch. + if patchDiffLength ~= 0 then + _patch_addContext(patch, prepatch_text) + patches[#patches + 1] = patch + patch = _new_patch_obj() + patchDiffLength = 0 + -- Unlike Unidiff, our patch lists have a rolling context. + -- https://github.com/google/diff-match-patch/wiki/Unidiff + -- Update prepatch text & pos to reflect the application of the + -- just completed patch. + prepatch_text = postpatch_text + char_count1 = char_count2 + end + end + end + + -- Update the current character count. + if diff_type ~= DIFF_INSERT then + char_count1 = char_count1 + #diff_text + end + if diff_type ~= DIFF_DELETE then + char_count2 = char_count2 + #diff_text + end + end + + -- Pick up the leftover patch if not empty. + if patchDiffLength > 0 then + _patch_addContext(patch, prepatch_text) + patches[#patches + 1] = patch + end + + return patches +end + +--[[ +* Merge a set of patches onto the text. Return a patched text, as well +* as a list of true/false values indicating which patches were applied. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @param {string} text Old text. +* @return {Array.>} Two return values, the +* new text and an array of boolean values. +--]] +function patch_apply(patches, text) + if patches[1] == nil then + return text, {} + end + + -- Deep copy the patches so that no changes are made to originals. + patches = _patch_deepCopy(patches) + + local nullPadding = _patch_addPadding(patches) + text = nullPadding .. text .. nullPadding + + _patch_splitMax(patches) + -- delta keeps track of the offset between the expected and actual location + -- of the previous patch. If there are patches expected at positions 10 and + -- 20, but the first patch was found at 12, delta is 2 and the second patch + -- has an effective expected position of 22. + local delta = 0 + local results = {} + for x, patch in ipairs(patches) do + local expected_loc = patch.start2 + delta + local text1 = _diff_text1(patch.diffs) + local start_loc + local end_loc = -1 + if #text1 > Match_MaxBits then + -- _patch_splitMax will only provide an oversized pattern in + -- the case of a monster delete. + start_loc = match_main(text, strsub(text1, 1, Match_MaxBits), expected_loc) + if start_loc ~= -1 then + end_loc = match_main(text, strsub(text1, -Match_MaxBits), expected_loc + #text1 - Match_MaxBits) + if end_loc == -1 or start_loc >= end_loc then + -- Can't find valid trailing context. Drop this patch. + start_loc = -1 + end + end + else + start_loc = match_main(text, text1, expected_loc) + end + if start_loc == -1 then + -- No match found. :( + results[x] = false + -- Subtract the delta for this failed patch from subsequent patches. + delta = delta - patch.length2 - patch.length1 + else + -- Found a match. :) + results[x] = true + delta = start_loc - expected_loc + local text2 + if end_loc == -1 then + text2 = strsub(text, start_loc, start_loc + #text1 - 1) + else + text2 = strsub(text, start_loc, end_loc + Match_MaxBits - 1) + end + if text1 == text2 then + -- Perfect match, just shove the replacement text in. + text = strsub(text, 1, start_loc - 1) .. _diff_text2(patch.diffs) .. strsub(text, start_loc + #text1) + else + -- Imperfect match. Run a diff to get a framework of equivalent + -- indices. + local diffs = diff_main(text1, text2, false) + if (#text1 > Match_MaxBits) and (diff_levenshtein(diffs) / #text1 > Patch_DeleteThreshold) then + -- The end points match, but the content is unacceptably bad. + results[x] = false + else + _diff_cleanupSemanticLossless(diffs) + local index1 = 1 + local index2 + for y, mod in ipairs(patch.diffs) do + if mod[1] ~= DIFF_EQUAL then + index2 = _diff_xIndex(diffs, index1) + end + if mod[1] == DIFF_INSERT then + text = strsub(text, 1, start_loc + index2 - 2) .. mod[2] .. strsub(text, start_loc + index2 - 1) + elseif mod[1] == DIFF_DELETE then + text = strsub(text, 1, start_loc + index2 - 2) + .. strsub(text, start_loc + _diff_xIndex(diffs, index1 + #mod[2] - 1)) + end + if mod[1] ~= DIFF_DELETE then + index1 = index1 + #mod[2] + end + end + end + end + end + end + -- Strip the padding off. + text = strsub(text, #nullPadding + 1, -#nullPadding - 1) + return text, results +end + +--[[ +* Take a list of patches and return a textual representation. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {string} Text representation of patches. +--]] +function patch_toText(patches) + local text = {} + for x, patch in ipairs(patches) do + _patch_appendText(patch, text) + end + return tconcat(text) +end + +--[[ +* Parse a textual representation of patches and return a list of patch objects. +* @param {string} textline Text representation of patches. +* @return {Array.<_new_patch_obj>} Array of patch objects. +* @throws {Error} If invalid input. +--]] +function patch_fromText(textline) + local patches = {} + if #textline == 0 then + return patches + end + local text = {} + for line in gmatch(textline, '([^\n]*)') do + text[#text + 1] = line + end + local textPointer = 1 + while textPointer <= #text do + local start1, length1, start2, length2 = strmatch(text[textPointer], '^@@ %-(%d+),?(%d*) %+(%d+),?(%d*) @@$') + if start1 == nil then + error('Invalid patch string: "' .. text[textPointer] .. '"') + end + local patch = _new_patch_obj() + patches[#patches + 1] = patch + + start1 = tonumber(start1) + length1 = tonumber(length1) or 1 + if length1 == 0 then + start1 = start1 + 1 + end + patch.start1 = start1 + patch.length1 = length1 + + start2 = tonumber(start2) + length2 = tonumber(length2) or 1 + if length2 == 0 then + start2 = start2 + 1 + end + patch.start2 = start2 + patch.length2 = length2 + + textPointer = textPointer + 1 + + while true do + local line = text[textPointer] + if line == nil then + break + end + local sign + sign, line = strsub(line, 1, 1), strsub(line, 2) + + local invalidDecode = false + local decoded = gsub(line, '%%(.?.?)', function(c) + local n = tonumber(c, 16) + if (#c ~= 2) or (n == nil) then + invalidDecode = true + return '' + end + return strchar(n) + end) + if invalidDecode then + -- Malformed URI sequence. + error('Illegal escape in patch_fromText: ' .. line) + end + + line = decoded + + if sign == '-' then + -- Deletion. + patch.diffs[#patch.diffs + 1] = { DIFF_DELETE, line } + elseif sign == '+' then + -- Insertion. + patch.diffs[#patch.diffs + 1] = { DIFF_INSERT, line } + elseif sign == ' ' then + -- Minor equality. + patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, line } + elseif sign == '@' then + -- Start of next patch. + break + elseif sign == '' then + -- Blank line? Whatever. + else + -- WTF? + error('Invalid patch mode "' .. sign .. '" in: ' .. line) + end + textPointer = textPointer + 1 + end + end + return patches +end + +-- --------------------------------------------------------------------------- +-- UNOFFICIAL/PRIVATE PATCH FUNCTIONS +-- --------------------------------------------------------------------------- + +local patch_meta = { + __tostring = function(patch) + local buf = {} + _patch_appendText(patch, buf) + return tconcat(buf) + end, +} + +--[[ +* Class representing one patch operation. +* @constructor +--]] +function _new_patch_obj() + return setmetatable({ + --[[ @type {Array.>} ]] + diffs = {}, + --[[ @type {?number} ]] + start1 = 1, -- nil; + --[[ @type {?number} ]] + start2 = 1, -- nil; + --[[ @type {number} ]] + length1 = 0, + --[[ @type {number} ]] + length2 = 0, + }, patch_meta) +end + +--[[ +* Increase the context until it is unique, +* but don't let the pattern expand beyond Match_MaxBits. +* @param {_new_patch_obj} patch The patch to grow. +* @param {string} text Source text. +* @private +--]] +function _patch_addContext(patch, text) + if #text == 0 then + return + end + local pattern = strsub(text, patch.start2, patch.start2 + patch.length1 - 1) + local padding = 0 + + -- LUANOTE: Lua's lack of a lastIndexOf function results in slightly + -- different logic here than in other language ports. + -- Look for the first two matches of pattern in text. If two are found, + -- increase the pattern length. + local firstMatch = indexOf(text, pattern) + local secondMatch = nil + if firstMatch ~= nil then + secondMatch = indexOf(text, pattern, firstMatch + 1) + end + while (#pattern == 0 or secondMatch ~= nil) and (#pattern < Match_MaxBits - Patch_Margin - Patch_Margin) do + padding = padding + Patch_Margin + pattern = strsub(text, max(1, patch.start2 - padding), patch.start2 + patch.length1 - 1 + padding) + firstMatch = indexOf(text, pattern) + if firstMatch ~= nil then + secondMatch = indexOf(text, pattern, firstMatch + 1) + else + secondMatch = nil + end + end + -- Add one chunk for good luck. + padding = padding + Patch_Margin + + -- Add the prefix. + local prefix = strsub(text, max(1, patch.start2 - padding), patch.start2 - 1) + if #prefix > 0 then + tinsert(patch.diffs, 1, { DIFF_EQUAL, prefix }) + end + -- Add the suffix. + local suffix = strsub(text, patch.start2 + patch.length1, patch.start2 + patch.length1 - 1 + padding) + if #suffix > 0 then + patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, suffix } + end + + -- Roll back the start points. + patch.start1 = patch.start1 - #prefix + patch.start2 = patch.start2 - #prefix + -- Extend the lengths. + patch.length1 = patch.length1 + #prefix + #suffix + patch.length2 = patch.length2 + #prefix + #suffix +end + +--[[ +* Given an array of patches, return another array that is identical. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {Array.<_new_patch_obj>} Array of patch objects. +--]] +function _patch_deepCopy(patches) + local patchesCopy = {} + for x, patch in ipairs(patches) do + local patchCopy = _new_patch_obj() + local diffsCopy = {} + for i, diff in ipairs(patch.diffs) do + diffsCopy[i] = { diff[1], diff[2] } + end + patchCopy.diffs = diffsCopy + patchCopy.start1 = patch.start1 + patchCopy.start2 = patch.start2 + patchCopy.length1 = patch.length1 + patchCopy.length2 = patch.length2 + patchesCopy[x] = patchCopy + end + return patchesCopy +end + +--[[ +* Add some padding on text start and end so that edges can match something. +* Intended to be called only from within patch_apply. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +* @return {string} The padding string added to each side. +--]] +function _patch_addPadding(patches) + local paddingLength = Patch_Margin + local nullPadding = '' + for x = 1, paddingLength do + nullPadding = nullPadding .. strchar(x) + end + + -- Bump all the patches forward. + for x, patch in ipairs(patches) do + patch.start1 = patch.start1 + paddingLength + patch.start2 = patch.start2 + paddingLength + end + + -- Add some padding on start of first diff. + local patch = patches[1] + local diffs = patch.diffs + local firstDiff = diffs[1] + if (firstDiff == nil) or (firstDiff[1] ~= DIFF_EQUAL) then + -- Add nullPadding equality. + tinsert(diffs, 1, { DIFF_EQUAL, nullPadding }) + patch.start1 = patch.start1 - paddingLength -- Should be 0. + patch.start2 = patch.start2 - paddingLength -- Should be 0. + patch.length1 = patch.length1 + paddingLength + patch.length2 = patch.length2 + paddingLength + elseif paddingLength > #firstDiff[2] then + -- Grow first equality. + local extraLength = paddingLength - #firstDiff[2] + firstDiff[2] = strsub(nullPadding, #firstDiff[2] + 1) .. firstDiff[2] + patch.start1 = patch.start1 - extraLength + patch.start2 = patch.start2 - extraLength + patch.length1 = patch.length1 + extraLength + patch.length2 = patch.length2 + extraLength + end + + -- Add some padding on end of last diff. + patch = patches[#patches] + diffs = patch.diffs + local lastDiff = diffs[#diffs] + if (lastDiff == nil) or (lastDiff[1] ~= DIFF_EQUAL) then + -- Add nullPadding equality. + diffs[#diffs + 1] = { DIFF_EQUAL, nullPadding } + patch.length1 = patch.length1 + paddingLength + patch.length2 = patch.length2 + paddingLength + elseif paddingLength > #lastDiff[2] then + -- Grow last equality. + local extraLength = paddingLength - #lastDiff[2] + lastDiff[2] = lastDiff[2] .. strsub(nullPadding, 1, extraLength) + patch.length1 = patch.length1 + extraLength + patch.length2 = patch.length2 + extraLength + end + + return nullPadding +end + +--[[ +* Look through the patches and break up any which are longer than the maximum +* limit of the match algorithm. +* Intended to be called only from within patch_apply. +* @param {Array.<_new_patch_obj>} patches Array of patch objects. +--]] +function _patch_splitMax(patches) + local patch_size = Match_MaxBits + local x = 1 + while true do + local patch = patches[x] + if patch == nil then + return + end + if patch.length1 > patch_size then + local bigpatch = patch + -- Remove the big old patch. + tremove(patches, x) + x = x - 1 + local start1 = bigpatch.start1 + local start2 = bigpatch.start2 + local precontext = '' + while bigpatch.diffs[1] do + -- Create one of several smaller patches. + local patch = _new_patch_obj() + local empty = true + patch.start1 = start1 - #precontext + patch.start2 = start2 - #precontext + if precontext ~= '' then + patch.length1, patch.length2 = #precontext, #precontext + patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, precontext } + end + while bigpatch.diffs[1] and (patch.length1 < patch_size - Patch_Margin) do + local diff_type = bigpatch.diffs[1][1] + local diff_text = bigpatch.diffs[1][2] + if diff_type == DIFF_INSERT then + -- Insertions are harmless. + patch.length2 = patch.length2 + #diff_text + start2 = start2 + #diff_text + patch.diffs[#patch.diffs + 1] = bigpatch.diffs[1] + tremove(bigpatch.diffs, 1) + empty = false + elseif + (diff_type == DIFF_DELETE) + and (#patch.diffs == 1) + and (patch.diffs[1][1] == DIFF_EQUAL) + and (#diff_text > 2 * patch_size) + then + -- This is a large deletion. Let it pass in one chunk. + patch.length1 = patch.length1 + #diff_text + start1 = start1 + #diff_text + empty = false + patch.diffs[#patch.diffs + 1] = { diff_type, diff_text } + tremove(bigpatch.diffs, 1) + else + -- Deletion or equality. + -- Only take as much as we can stomach. + diff_text = strsub(diff_text, 1, patch_size - patch.length1 - Patch_Margin) + patch.length1 = patch.length1 + #diff_text + start1 = start1 + #diff_text + if diff_type == DIFF_EQUAL then + patch.length2 = patch.length2 + #diff_text + start2 = start2 + #diff_text + else + empty = false + end + patch.diffs[#patch.diffs + 1] = { diff_type, diff_text } + if diff_text == bigpatch.diffs[1][2] then + tremove(bigpatch.diffs, 1) + else + bigpatch.diffs[1][2] = strsub(bigpatch.diffs[1][2], #diff_text + 1) + end + end + end + -- Compute the head context for the next patch. + precontext = _diff_text2(patch.diffs) + precontext = strsub(precontext, -Patch_Margin) + -- Append the end context for this patch. + local postcontext = strsub(_diff_text1(bigpatch.diffs), 1, Patch_Margin) + if postcontext ~= '' then + patch.length1 = patch.length1 + #postcontext + patch.length2 = patch.length2 + #postcontext + if patch.diffs[1] and (patch.diffs[#patch.diffs][1] == DIFF_EQUAL) then + patch.diffs[#patch.diffs][2] = patch.diffs[#patch.diffs][2] .. postcontext + else + patch.diffs[#patch.diffs + 1] = { DIFF_EQUAL, postcontext } + end + end + if not empty then + x = x + 1 + tinsert(patches, x, patch) + end + end + end + x = x + 1 + end +end + +--[[ +* Emulate GNU diff's format. +* Header: @@ -382,8 +481,9 @@ +* @return {string} The GNU diff string. +--]] +function _patch_appendText(patch, text) + local coords1, coords2 + local length1, length2 = patch.length1, patch.length2 + local start1, start2 = patch.start1, patch.start2 + local diffs = patch.diffs + + if length1 == 1 then + coords1 = start1 + else + coords1 = ((length1 == 0) and (start1 - 1) or start1) .. ',' .. length1 + end + + if length2 == 1 then + coords2 = start2 + else + coords2 = ((length2 == 0) and (start2 - 1) or start2) .. ',' .. length2 + end + text[#text + 1] = '@@ -' .. coords1 .. ' +' .. coords2 .. ' @@\n' + + local op + -- Escape the body of the patch with %xx notation. + for x, diff in ipairs(patch.diffs) do + local diff_type = diff[1] + if diff_type == DIFF_INSERT then + op = '+' + elseif diff_type == DIFF_DELETE then + op = '-' + elseif diff_type == DIFF_EQUAL then + op = ' ' + end + text[#text + 1] = op .. gsub(diffs[x][2], percentEncode_pattern, percentEncode_replace) .. '\n' + end + + return text +end + +-- Expose the API +local _M = {} + +_M.DIFF_DELETE = DIFF_DELETE +_M.DIFF_INSERT = DIFF_INSERT +_M.DIFF_EQUAL = DIFF_EQUAL + +_M.diff_main = diff_main +_M.diff_cleanupSemantic = diff_cleanupSemantic +_M.diff_cleanupEfficiency = diff_cleanupEfficiency +_M.diff_levenshtein = diff_levenshtein +_M.diff_prettyHtml = diff_prettyHtml + +_M.match_main = match_main + +_M.patch_make = patch_make +_M.patch_toText = patch_toText +_M.patch_fromText = patch_fromText +_M.patch_apply = patch_apply + +-- Expose some non-API functions as well, for testing purposes etc. +_M.diff_commonPrefix = _diff_commonPrefix +_M.diff_commonSuffix = _diff_commonSuffix +_M.diff_commonOverlap = _diff_commonOverlap +_M.diff_halfMatch = _diff_halfMatch +_M.diff_bisect = _diff_bisect +_M.diff_cleanupMerge = _diff_cleanupMerge +_M.diff_cleanupSemanticLossless = _diff_cleanupSemanticLossless +_M.diff_text1 = _diff_text1 +_M.diff_text2 = _diff_text2 +_M.diff_toDelta = _diff_toDelta +_M.diff_fromDelta = _diff_fromDelta +_M.diff_xIndex = _diff_xIndex +_M.match_alphabet = _match_alphabet +_M.match_bitap = _match_bitap +_M.new_patch_obj = _new_patch_obj +_M.patch_addContext = _patch_addContext +_M.patch_splitMax = _patch_splitMax +_M.patch_addPadding = _patch_addPadding +_M.settings = settings + +return _M diff --git a/tests/diff_spec.lua b/tests/diff_spec.lua index 62866c40..cdea4c1b 100644 --- a/tests/diff_spec.lua +++ b/tests/diff_spec.lua @@ -1,22 +1,6 @@ local diff = require('CopilotChat.utils.diff') describe('CopilotChat.utils.diff', function() - it('parses unified diff', function() - local diff_text = [[ ---- a/foo.txt -+++ b/foo.txt -@@ ... @@ - context line --old line -+new line -]] - local file_path, hunks = diff.parse_unified_diff(diff_text) - assert.equals('b/foo.txt', file_path) - assert.equals('context line', hunks[1].context[1]) - assert.equals('old line', hunks[1].minus[1]) - assert.equals('new line', hunks[1].plus[1]) - end) - it('applies unified diff', function() local diff_text = [[ --- a/foo.txt @@ -27,26 +11,12 @@ describe('CopilotChat.utils.diff', function() +new ]] local original = { 'context', 'old', 'other' } - local result, applied = diff.apply_unified_diff(diff_text, original) + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) assert.is_true(applied) assert.are.same({ 'context', 'new', 'other' }, result) end) - it('gets unified diff region', function() - local diff_text = [[ ---- a/foo.txt -+++ b/foo.txt -@@ ... @@ - context --old -+new -]] - local original = { 'context', 'old', 'other' } - local first, last = diff.get_unified_diff_region(diff_text, original) - assert.equals(2, first) - assert.equals(2, last) - end) - it('applies unified diff with no context', function() local diff_text = [[ --- a/foo.txt @@ -56,7 +26,8 @@ describe('CopilotChat.utils.diff', function() +new ]] local original = { 'old', 'other' } - local result, applied = diff.apply_unified_diff(diff_text, original) + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) assert.is_true(applied) assert.are.same({ 'new', 'other' }, result) end) @@ -81,7 +52,8 @@ describe('CopilotChat.utils.diff', function() 'context3', 'other', } - local result, applied = diff.apply_unified_diff(diff_text, original) + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) assert.is_true(applied) assert.are.same({ 'context1', @@ -93,7 +65,7 @@ describe('CopilotChat.utils.diff', function() }, result) end) - it('does not apply ambiguous edit', function() + it('gets unified diff region', function() local diff_text = [[ --- a/foo.txt +++ b/foo.txt @@ -102,10 +74,159 @@ describe('CopilotChat.utils.diff', function() -old +new ]] - local original = { 'context', 'old', 'context', 'old' } - local result, applied = diff.apply_unified_diff(diff_text, original) - -- Should not apply because there are two possible matches - assert.is_false(applied) - assert.are.same({ 'context', 'old', 'context', 'old' }, result) + local original = { 'context', 'old', 'other' } + local original_content = table.concat(original, '\n') + local _, _, first, last = diff.apply_unified_diff(diff_text, original_content) + assert.equals(2, first) + assert.equals(2, last) + end) + + it('applies unified diff with only additions', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ + context ++added1 ++added2 +]] + local original = { 'context', 'other' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ 'context', 'added1', 'added2', 'other' }, result) + end) + + it('applies unified diff with only deletions', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ + context +-old1 +-old2 +]] + local original = { 'context', 'old1', 'old2', 'other' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ 'context', 'other' }, result) + end) + + it('applies unified diff with changes at start and end', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ +-oldstart ++newstart + context +-oldend ++newend +]] + local original = { 'oldstart', 'context', 'oldend' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ 'newstart', 'context', 'newend' }, result) + end) + + it('applies unified diff with multiple hunks', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ + context1 +-old1 ++new1 +@@ ... @@ + context2 +-old2 ++new2 +]] + local original = { 'context1', 'old1', 'context2', 'old2', 'other' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ 'context1', 'new1', 'context2', 'new2', 'other' }, result) + end) + + it('applies unified diff with no changes', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ + context + unchanged +]] + local original = { 'context', 'unchanged' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same(original, result) + end) + + it('applies unified diff with all lines deleted', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ +-old1 +-old2 +-old3 +]] + local original = { 'old1', 'old2', 'old3' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ '' }, result) + end) + + it('applies unified diff with all lines added to empty file', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ ++new1 ++new2 ++new3 +]] + local original = {} + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ 'new1', 'new2', 'new3' }, result) + end) + + it('applies unified diff with changes at end of file', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ + context +-oldend ++newend +]] + local original = { 'context', 'oldend' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ 'context', 'newend' }, result) + end) + + it('applies unified diff with changes at start of file', function() + local diff_text = [[ +--- a/foo.txt ++++ b/foo.txt +@@ ... @@ +-oldstart ++newstart + context +]] + local original = { 'oldstart', 'context' } + local original_content = table.concat(original, '\n') + local result, applied = diff.apply_unified_diff(diff_text, original_content) + assert.is_true(applied) + assert.are.same({ 'newstart', 'context' }, result) end) end)