ddmp.patch 0/296(0%) line coverage

      
10
20
30
40
50
60
70
80
90
100
110
120
130
140
150
160
170
180
190
200
210
220
230
240
250
260
270
280
290
300
310
320
330
340
350
360
370
380
390
400
410
420
430
440
450
460
470
480
490
500
510
520
530
540
550
560
570
580
590
600
610
620
630
640
650
660
670
680
690
700
710
720
730
740
750
760
770
780
790
800
810
820
830
840
850
860
870
880
890
900
910
920
930
940
950
960
970
980
990
1000
1010
1020
1030
1040
1050
1060
1070
1080
1090
1100
1110
1120
1130
1140
1150
1160
1170
1180
1190
1200
1210
1220
1230
1240
1250
1260
1270
1280
1290
1300
1310
1320
1330
1340
1350
1360
1370
1380
1390
1400
1410
1420
1430
1440
1450
1460
1470
1480
1490
1500
1510
1520
1530
1540
1550
1560
1570
1580
1590
1600
1610
1620
1630
1640
1650
1660
1670
1680
1690
1700
1710
1720
1730
1740
1750
1760
1770
1780
1790
1800
1810
1820
1830
1840
1850
1860
1870
1880
1890
1900
1910
1920
1930
1940
1950
1960
1970
1980
1990
2000
2010
2020
2030
2040
2050
2060
2070
2080
2090
2100
2110
2120
2130
2140
2150
2160
2170
2180
2190
2200
2210
2220
2230
2240
2250
2260
2270
2280
2290
2300
2310
2320
2330
2340
2350
2360
2370
2380
2390
2400
2410
2420
2430
2440
2450
2460
2470
2480
2490
2500
2510
2520
2530
2540
2550
2560
2570
2580
2590
2600
2610
2620
2630
2640
2650
2660
2670
2680
2690
2700
2710
2720
2730
2740
2750
2760
2770
2780
2790
2800
2810
2820
2830
2840
2850
2860
2870
2880
2890
2900
2910
2920
2930
2940
2950
2960
2970
2980
2990
3000
3010
3020
3030
3040
3050
3060
3070
3080
3090
3100
3110
3120
3130
3140
3150
3160
3170
3180
3190
3200
3210
3220
3230
3240
3250
3260
3270
3280
3290
3300
3310
3320
3330
3340
3350
3360
3370
3380
3390
3400
3410
3420
3430
3440
3450
3460
3470
3480
3490
3500
3510
3520
3530
3540
3550
3560
3570
3580
3590
3600
3610
3620
3630
3640
3650
3660
3670
3680
3690
3700
3710
3720
3730
3740
3750
3760
3770
3780
3790
3800
3810
3820
3830
3840
3850
3860
3870
3880
3890
3900
3910
3920
3930
3940
3950
3960
3970
3980
3990
4000
4010
4020
4030
4040
4050
4060
4070
4080
4090
4100
4110
4120
4130
4140
4150
4160
4170
4180
4190
4200
4210
4220
4230
4240
4250
4260
4270
4280
4290
4300
4310
4320
4330
4340
4350
4360
4370
4380
4390
4400
4410
4420
4430
4440
4450
4460
4470
4480
4490
4500
4510
4520
4530
4540
4550
4560
4570
4580
4590
4600
4610
4620
4630
4640
4650
4660
4670
4680
4690
4700
4710
4720
4730
4740
4750
4760
4770
4780
4790
4800
4810
4820
4830
4840
4850
4860
4870
4880
4890
4900
4910
4920
4930
4940
4950
4960
4970
4980
4990
5000
5010
5020
5030
5040
5050
5060
5070
5080
5090
5100
5110
5120
5130
5140
5150
5160
5170
5180
5190
5200
5210
5220
5230
5240
5250
5260
5270
5280
5290
5300
5310
5320
5330
5340
5350
5360
5370
5380
5390
5400
5410
5420
5430
5440
5450
5460
5470
5480
5490
5500
5510
5520
5530
5540
5550
5560
5570
5580
5590
5600
5610
5620
5630
5640
5650
5660
5670
5680
5690
5700
5710
5720
5730
5740
5750
5760
5770
5780
5790
5800
5810
5820
5830
5840
5850
5860
5870
5880
5890
/* * Copyright 2008 Google Inc. All Rights Reserved. * Copyright 2013-2014 Jan Krüger. All Rights Reserved. * Author: fraser@google.com (Neil Fraser) * Author: anteru@developer.shelter13.net (Matthaeus G. Chajdas) * Author: jan@jandoe.de (Jan Krüger) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Diff Match and Patch * http://code.google.com/p/google-diff-match-patch/ */ module ddmp.patch; import std.algorithm : min, max; import std.array; import std.conv; import std.exception : enforce; import std.string:lastIndexOf; import ddmp.diff; import ddmp.match; import ddmp.util; int MATCH_MAXBITS = 32; int PATCH_MARGIN = 4; float PATCH_DELETE_THRESHOLD = 0.5f; struct Patch { Diff[] diffs; sizediff_t start1; sizediff_t start2; sizediff_t length1; sizediff_t length2; string toString() const { import std.uri : encode; auto app = appender!string(); app.put("@@ -"); if( length1 == 0 ){ app.put(to!string(start1)); app.put(",0"); } else if( length1 == 1 ){ app.put(to!string(start1 + 1)); } else { app.put(to!string(start1 + 1)); app.put(","); app.put(to!string(length1)); } app.put(" +"); if( length2 == 0 ){ app.put(to!string(start2)); app.put(",0"); } else if( length2 == 1 ){ app.put(to!string(start2 + 1)); } else { app.put(to!string(start2 + 1)); app.put(","); app.put(to!string(length2)); } app.put(" @@\n"); foreach( d ; diffs){ final switch( d.operation ){ case Operation.INSERT: app.put("+"); break; case Operation.DELETE: app.put("-"); break; case Operation.EQUAL: app.put(" "); break; } app.put(encode(d.text).replace("%20", " ")); app.put("\n"); } return unescapeForEncodeUriCompatibility(app.data()); } } /** * Increase the context until it is unique, * but don't let the pattern expand beyond Match_MaxBits. * @param patch The patch to grow. * @param text Source text. */ void addContext(ref Patch patch, string text) { if( text.length == 0 ) return; auto pattern = text.substr(patch.start2, patch.length1); sizediff_t padding = 0; // Look for the first and last matches of pattern in text. If two // different matches are found, increase the pattern length. while( text.indexOfAlt(pattern) != text.lastIndexOf(pattern) && pattern.length < MATCH_MAXBITS - PATCH_MARGIN - PATCH_MARGIN ){ padding += PATCH_MARGIN; pattern = text[max(0, patch.start2 - padding)..min(text.length, patch .start2 + patch.length1 + padding)]; } // Add one chunk for good luck. padding += PATCH_MARGIN; // Add the prefix. auto prefix = text[max(0, patch.start2 - padding)..patch.start2]; if( prefix.length != 0 ){ patch.diffs.insert(0, [Diff(Operation.EQUAL, prefix)]); } // Add the suffix. auto suffix = text[patch.start2 + patch.length1..min(text.length, patch.start2 + patch.length1 + padding)]; if( suffix.length != 0 ){ patch.diffs ~= Diff(Operation.EQUAL, suffix); } // Roll back the start points. patch.start1 -= prefix.length; patch.start2 -= prefix.length; // Extend the lengths. patch.length1 += prefix.length + suffix.length; patch.length2 += prefix.length + suffix.length; } /** * Compute a list of patches to turn text1 into text2. * A set of diffs will be computed. * @param text1 Old text. * @param text2 New text. * @return List of Patch objects. */ Patch[] patch_make(string text1, string text2) { // No diffs provided, comAdde our own. auto diffs = diff_main(text1, text2, true); if (diffs.length > 2) { cleanupSemantic(diffs); cleanupEfficiency(diffs); } return patch_make(text1, diffs); } /** * Compute a list of patches to turn text1 into text2. * text1 will be derived from the provided diffs. * @param diffs Array of Diff objects for text1 to text2. * @return List of Patch objects. */ Patch[] patch_make(Diff[] diffs) { // Check for null inputs not needed since null can't be passed in C#. // No origin string provided, comAdde our own. auto text1 = diff_text1(diffs); return patch_make(text1, diffs); } /** * Compute a list of patches to turn text1 into text2. * text2 is not provided, diffs are the delta between text1 and text2. * @param text1 Old text. * @param diffs Array of Diff objects for text1 to text2. * @return List of Patch objects. */ Patch[] patch_make(string text1, Diff[] diffs) { Patch[] patches; if( diffs.length == 0 ) return patches; Patch patch; auto char_count1 = 0; // Number of characters into the text1 string. auto char_count2 = 0; // Number of characters into the text2 string. // Start with text1 (prepatch_text) and apply the diffs until we arrive at // text2 (postpatch_text). We recreate the patches one by one to determine // context info. auto prepatch_text = text1; auto postpatch_text = text1; foreach( diff ; diffs ){ if( patch.diffs.length == 0 && diff.operation != Operation.EQUAL ){ // A new patch starts here. patch.start1 = char_count1; patch.start2 = char_count2; } final switch(diff.operation){ case Operation.INSERT: patch.diffs ~= diff; patch.length2 += diff.text.length; postpatch_text.insert(char_count2, diff.text); break; case Operation.DELETE: patch.length2 += diff.text.length; patch.diffs ~= diff; postpatch_text.remove(char_count2, diff.text.length); break; case Operation.EQUAL: if( diff.text.length <= 2 * PATCH_MARGIN && patch.diffs.length != 0 && diff != diffs[$-1] ){ patch.diffs ~= diff; patch.length1 += diff.text.length; patch.length2 += diff.text.length; } if( diff.text.length >= 2 * PATCH_MARGIN ){ if( patch.diffs.length != 0 ){ addContext(patch, prepatch_text); patches ~= patch; patch = Patch(); prepatch_text = postpatch_text; char_count1 = char_count2; } } break; } // Update the current character count. if (diff.operation != Operation.INSERT) { char_count1 += diff.text.length; } if (diff.operation != Operation.DELETE) { char_count2 += diff.text.length; } } // Pick up the leftover patch if not empty. if( !patch.diffs.empty ){ addContext(patch, prepatch_text); patches ~= patch; } return patches; } /** * Merge a set of patches onto the text. Return a patched text, as well * as an array of true/false values indicating which patches were applied. * @param patches Array of Patch objects * @param text Old text. * @return Two element Object array, containing the new text and an array of * bool values. */ struct PatchApplyResult { string text; bool[] patchesApplied; } PatchApplyResult apply(Patch[] patches, string text) { PatchApplyResult result; if( patches.length == 0 ) return result; auto nullPadding = addPadding(patches); text = nullPadding ~ text ~ nullPadding; splitMax(patches); result.patchesApplied.length = patches.length; // init patchesApplied array sizediff_t x = 0; // delta keeps track of the offset between the expected and actual // location of the previous patch. If there are patches expected at // positions 10 and 20, but the first patch was found at 12, delta is 2 // and the second patch has an effective expected position of 22. sizediff_t delta = 0; foreach( patch ; patches ){ auto expected_loc = patch.start2 + delta; auto text1 = diff_text1(patch.diffs); sizediff_t start_loc; sizediff_t end_loc = -1; if( text1.length > MATCH_MAXBITS ){ // patch_splitMax will only provide an oversized pattern // in the case of a monster delete start_loc = match_main(text, text1.substr(0, MATCH_MAXBITS), expected_loc); if( start_loc != -1 ){ end_loc = match_main(text, text1.substr(text1.length - MATCH_MAXBITS), expected_loc + text1.length - MATCH_MAXBITS); if( end_loc == -1 || start_loc >= end_loc ){ // Can't find valid trailing context. Drop this patch. start_loc = -1; } } } else { start_loc = match_main(text, text1, expected_loc); } if( start_loc == -1 ){ // No match found. :( result.patchesApplied[x] = false; // Subtract the delta for this failed patch from subsequent patches. delta -= patch.length2 - patch.length1; } else { // Found a match. :) result.patchesApplied[x] = true; delta = start_loc - expected_loc; string text2; if( end_loc == -1 ){ text2 = text[ start_loc .. min(start_loc + text1.length, text.length) ]; } else { text2 = text[ start_loc .. min(end_loc + MATCH_MAXBITS, text.length) ]; } if( text1 == text2 ) { // Perfect match, just shove the replacement text in. text = text.substr(0, start_loc) ~ diff_text2(patch.diffs) ~ text.substr(start_loc + text1.length); } else { // Imperfect match. Run a diff to get a framework of equivalent indices. auto diffs = diff_main(text1, text2, false); if( text1.length > MATCH_MAXBITS && levenshtein(diffs) / cast(float)text1.length > PATCH_DELETE_THRESHOLD){ // The end points match, but the content is unacceptably bad. result.patchesApplied[x] = false; } else { cleanupSemanticLossless(diffs); auto index1 = 0; foreach( diff; patch.diffs ){ if( diff.operation != Operation.EQUAL ){ auto index2 = xIndex(diffs, index1); if( diff.operation == Operation.INSERT ){ // Insertion text.insert(start_loc + index2, diff.text); } else if( diff.operation == Operation.DELETE ){ // Deletion text.remove(start_loc + index2, xIndex(diffs, index1 + diff.text.length) - index2); } } if( diff.operation != Operation.DELETE ){ index1 += diff.text.length; } } } } } x++; } // Strip the padding off. result.text = text.substr(nullPadding.length, text.length - 2 * nullPadding.length); return result; } /** * Add some padding on text start and end so that edges can match something. * Intended to be called only from within patch_apply. * @param patches Array of Patch objects. * @return The padding string added to each side. */ string addPadding(Patch[] patches) { auto paddingLength = PATCH_MARGIN; string nullPadding; for(sizediff_t x = 1; x <= paddingLength; x++){ nullPadding ~= cast(char)x; } // Bump all the patches forward. foreach( patch; patches ){ patch.start1 += paddingLength; patch.start2 += paddingLength; } // Add some padding on start of first diff. Patch patch = patches[0]; auto diffs = patch.diffs; if( diffs.length == 0 || diffs[0].operation != Operation.EQUAL ){ // Add nullPadding equality. diffs.insert(0, [Diff(Operation.EQUAL, nullPadding)]); patch.start1 -= paddingLength; // Should be 0. patch.start2 -= paddingLength; // Should be 0. patch.length1 += paddingLength; patch.length2 += paddingLength; } else if (paddingLength > diffs[0].text.length) { // Grow first equality. Diff firstDiff = diffs[0]; auto extraLength = paddingLength - firstDiff.text.length; firstDiff.text = nullPadding.substr(firstDiff.text.length) ~ firstDiff.text; patch.start1 -= extraLength; patch.start2 -= extraLength; patch.length1 += extraLength; patch.length2 += extraLength; } // Add some padding on end of last diff. patch = patches[$-1]; diffs = patch.diffs; if( diffs.length == 0 || diffs[$-1].operation != Operation.EQUAL) { // Add nullPadding equality. diffs ~= Diff(Operation.EQUAL, nullPadding); patch.length1 += paddingLength; patch.length2 += paddingLength; } else if (paddingLength > diffs[$-1].text.length) { // Grow last equality. Diff lastDiff = diffs[$-1]; auto extraLength = paddingLength - lastDiff.text.length; lastDiff.text ~= nullPadding.substr(0, extraLength); patch.length1 += extraLength; patch.length2 += extraLength; } return nullPadding; } /** * Look through the patches and break up any which are longer than the * maximum limit of the match algorithm. * Intended to be called only from within patch_apply. * @param patches List of Patch objects. */ void splitMax(Patch[] patches) { auto patch_size = MATCH_MAXBITS; for( auto x = 0; x < patches.length; x++ ){ if( patches[x].length1 <= patch_size ) continue; Patch bigpatch = patches[x]; patches.splice(x--, 1); auto start1 = bigpatch.start1; auto start2 = bigpatch.start2; string precontext; while( bigpatch.diffs.length != 0){ Patch patch; bool empty = true; patch.start1 = start1 - precontext.length; patch.start2 = start2 - precontext.length; if( precontext.length != 0 ){ patch.length1 = patch.length2 = precontext.length; patch.diffs ~= Diff(Operation.EQUAL, precontext); } while( bigpatch.diffs.length != 0 && patch.length1 < patch_size - PATCH_MARGIN ){ Operation diff_type = bigpatch.diffs[0].operation; auto diff_text = bigpatch.diffs[0].text; if( diff_type == Operation.INSERT ){ // Insertions are harmless. patch.length2 += diff_text.length; start2 += diff_text.length; patch.diffs ~= bigpatch.diffs[0]; bigpatch.diffs.remove(0); empty = false; } else if( diff_type == Operation.DELETE && patch.diffs.length == 1 && patch.diffs[0].operation == Operation.EQUAL && diff_text.length > 2 * patch_size) { // This is a large deletion. Let it pass in one chunk. patch.length1 += diff_text.length; start1 += diff_text.length; empty = false; patch.diffs ~= Diff(diff_type, diff_text); bigpatch.diffs.remove(0); } else { // Deletion or equality. Only takes as much as we can stomach. diff_text = diff_text.substr(0, min(diff_text.length, patch_size - patch.length1 - PATCH_MARGIN)); patch.length1 += diff_text.length; start1 += diff_text.length; if( diff_type == Operation.EQUAL ){ patch.length2 += diff_text.length; start2 += diff_text.length; } else { empty = false; } patch.diffs ~= Diff(diff_type, diff_text); if( diff_text == bigpatch.diffs[0].text ){ bigpatch.diffs.remove(0); } else { bigpatch.diffs[0].text = bigpatch.diffs[0].text.substr(diff_text.length); } } } // Compute the head context for the next patch. precontext = diff_text2(patch.diffs); precontext = precontext.substr(max(0, precontext.length - PATCH_MARGIN)); auto postcontext = diff_text1(bigpatch.diffs); if( postcontext.length > PATCH_MARGIN ){ postcontext = postcontext.substr(0, PATCH_MARGIN); } if( postcontext.length != 0 ){ patch.length1 += postcontext.length; patch.length2 += postcontext.length; if( patch.diffs.length != 0 && patch.diffs[patch.diffs.length - 1].operation == Operation.EQUAL) { patch.diffs[$].text ~= postcontext; } else { patch.diffs ~= Diff(Operation.EQUAL, postcontext); } } if( !empty ){ patches.splice(++x, 0, [patch]); } } } } /** * Take a list of patches and return a textual representation. * @param patches List of Patch objects. * @return Text representation of patches. */ public string patch_toText(in Patch[] patches) { auto text = appender!string(); foreach (aPatch; patches) text ~= aPatch.toString(); return text.data; } /** * Parse a textual representation of patches and return a List of Patch * objects. * @param textline Text representation of patches. * @return List of Patch objects. * @throws ArgumentException If invalid input. */ public Patch[] patch_fromText(string textline) { import std.regex : regex, matchFirst; import std.string : format, split; auto patches = appender!(Patch[])(); if (textline.length == 0) return null; auto text = textline.split("\n"); sizediff_t textPointer = 0; auto patchHeader = regex("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); char sign; string line; while (textPointer < text.length) { auto m = matchFirst(text[textPointer], patchHeader); enforce (m, "Invalid patch string: " ~ text[textPointer]); Patch patch; patch.start1 = m[1].to!sizediff_t; if (m[2].length == 0) { patch.start1--; patch.length1 = 1; } else if (m[2] == "0") { patch.length1 = 0; } else { patch.start1--; patch.length1 = m[2].to!sizediff_t; } patch.start2 = m[3].to!sizediff_t; if (m[4].length == 0) { patch.start2--; patch.length2 = 1; } else if (m[4] == "0") { patch.length2 = 0; } else { patch.start2--; patch.length2 = m[4].to!sizediff_t; } textPointer++; while (textPointer < text.length) { import std.uri : decodeComponent; if (textPointer >= text.length || !text[textPointer].length) { // Blank line? Whatever. textPointer++; continue; } sign = text[textPointer][0]; line = text[textPointer][1 .. $]; line = line.replace("+", "%2b"); line = decodeComponent(line); if (sign == '-') { // Deletion. patch.diffs ~= Diff(Operation.DELETE, line); } else if (sign == '+') { // Insertion. patch.diffs ~= Diff(Operation.INSERT, line); } else if (sign == ' ') { // Minor equality. patch.diffs ~= Diff(Operation.EQUAL, line); } else if (sign == '@') { // Start of next patch. break; } else { // WTF? throw new Exception(format("Invalid patch mode '%s' in: %s", sign, line)); } textPointer++; } patches ~= patch; } return patches.data; }