Mercurial > jhg
annotate src/org/tmatesoft/hg/internal/PatchGenerator.java @ 533:e6f72c9829a6
Generate patches using diff algorithm
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> | 
|---|---|
| date | Wed, 30 Jan 2013 15:48:36 +0100 | 
| parents | |
| children | dd4f6311af52 | 
| rev | line source | 
|---|---|
| 
533
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
1 /* | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
2 * Copyright (c) 2013 TMate Software Ltd | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
3 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
4 * This program is free software; you can redistribute it and/or modify | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
5 * it under the terms of the GNU General Public License as published by | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
6 * the Free Software Foundation; version 2 of the License. | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
7 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
8 * This program is distributed in the hope that it will be useful, | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
11 * GNU General Public License for more details. | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
12 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
13 * For information on how to redistribute this software under | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
14 * the terms of a license other than GNU General Public License | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
15 * contact TMate Software at support@hg4j.com | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
16 */ | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
17 package org.tmatesoft.hg.internal; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
18 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
19 import java.util.ArrayList; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
20 import java.util.HashMap; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
21 import java.util.Map; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
22 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
23 import org.tmatesoft.hg.repo.HgDataFile; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
24 import org.tmatesoft.hg.repo.HgLookup; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
25 import org.tmatesoft.hg.repo.HgRepository; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
26 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
27 /** | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
28 * Mercurial cares about changes only up to the line level, e.g. a simple file version bump in manifest looks like (RevlogDump output): | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
29 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
30 * 522: 233748 0 103 17438 433 522 521 -1 756073cf2321df44d3ed0585f2a5754bc8a1b2f6 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
31 * <PATCH>: | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
32 * 3487..3578, 91:src/org/tmatesoft/hg/core/HgIterateDirection.java\00add61a8a665c5d8f092210767f812fe0d335ac8 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
33 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
34 * I.e. for the {fname}{revision} entry format of manifest, not only {revision} is changed, but the whole line, with unchanged {fname} is recorded | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
35 * in the patch. | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
36 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
37 * Mercurial paper describes reasons for choosing this approach to delta generation, too. | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
38 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
39 * | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
40 * @author Artem Tikhomirov | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
41 * @author TMate Software Ltd. | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
42 */ | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
43 public class PatchGenerator { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
44 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
45 private Map<ChunkSequence.ByteChain, IntVector> chunk2UseIndex; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
46 private ChunkSequence seq1, seq2; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
47 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
48 // get filled by #longestMatch, track start of common sequence in seq1 and seq2, respectively | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
49 private int matchStartS1, matchStartS2; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
50 // get filled by #findMatchingBlocks, track start of changed/unknown sequence in seq1 and seq2 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
51 private int changeStartS1, changeStartS2; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
52 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
53 public void init(byte[] data1, byte[] data2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
54 seq1 = new ChunkSequence(data1); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
55 seq1.splitByNewlines(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
56 seq2 = new ChunkSequence(data2); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
57 seq2.splitByNewlines(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
58 prepare(seq2); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
59 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
60 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
61 private void prepare(ChunkSequence s2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
62 chunk2UseIndex = new HashMap<ChunkSequence.ByteChain, IntVector>(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
63 for (int i = 0, len = s2.chunkCount(); i < len; i++) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
64 ChunkSequence.ByteChain bc = s2.chunk(i); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
65 IntVector loc = chunk2UseIndex.get(bc); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
66 if (loc == null) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
67 chunk2UseIndex.put(bc, loc = new IntVector()); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
68 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
69 loc.add(i); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
70 // bc.registerUseIn(i) - BEWARE, use of bc here is incorrect | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
71 // in this case need to find the only ByteChain to keep indexes | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
72 // i.e. when there are few equal ByteChain instances, notion of "usedIn" shall be either shared (reference same vector) | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
73 // or kept within only one of them | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
74 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
75 // for (ChunkSequence.ByteChain bc : chunk2UseIndex.keySet()) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
76 // System.out.printf("%s: {", new String(bc.data())); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
77 // for (int x : chunk2UseIndex.get(bc).toArray()) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
78 // System.out.printf(" %d,", x); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
79 // } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
80 // System.out.println("}"); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
81 // } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
82 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
83 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
84 public void findMatchingBlocks() { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
85 changeStartS1 = changeStartS2 = 0; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
86 findMatchingBlocks(0, seq1.chunkCount(), 0, seq2.chunkCount()); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
87 if (changeStartS1 < seq1.chunkCount() || changeStartS2 < seq2.chunkCount()) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
88 reportDeltaElement(seq1.chunkCount(), seq2.chunkCount()); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
89 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
90 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
91 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
92 /** | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
93 * implementation based on Python's difflib.py and SequenceMatcher | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
94 */ | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
95 public int longestMatch(int startS1, int endS1, int startS2, int endS2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
96 matchStartS1 = matchStartS2 = 0; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
97 int maxLength = 0; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
98 IntMap<Integer> chunkIndex2MatchCount = new IntMap<Integer>(8); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
99 for (int i = startS1; i < endS1; i++) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
100 ChunkSequence.ByteChain bc = seq1.chunk(i); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
101 IntMap<Integer> newChunkIndex2MatchCount = new IntMap<Integer>(8); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
102 IntVector occurencesInS2 = chunk2UseIndex.get(bc); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
103 if (occurencesInS2 == null) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
104 // chunkIndex2MatchCount.clear(); // TODO need clear instead of new instance | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
105 chunkIndex2MatchCount = newChunkIndex2MatchCount; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
106 continue; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
107 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
108 for (int j : occurencesInS2.toArray()) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
109 // s1[i] == s2[j] | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
110 if (j < startS2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
111 continue; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
112 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
113 if (j >= endS2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
114 break; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
115 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
116 int prevChunkMatches = chunkIndex2MatchCount.containsKey(j-1) ? chunkIndex2MatchCount.get(j-1) : 0; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
117 int k = prevChunkMatches + 1; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
118 newChunkIndex2MatchCount.put(j, k); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
119 if (k > maxLength) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
120 matchStartS1 = i-k+1; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
121 matchStartS2 = j-k+1; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
122 maxLength = k; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
123 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
124 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
125 chunkIndex2MatchCount = newChunkIndex2MatchCount; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
126 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
127 return maxLength; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
128 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
129 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
130 public void findMatchingBlocks(int startS1, int endS1, int startS2, int endS2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
131 int matchLength = longestMatch(startS1, endS1, startS2, endS2); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
132 if (matchLength > 0) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
133 final int saveStartS1 = matchStartS1, saveStartS2 = matchStartS2; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
134 if (startS1 < matchStartS1 && startS2 < matchStartS2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
135 findMatchingBlocks(startS1, matchStartS1, startS2, matchStartS2); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
136 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
137 reportDeltaElement(saveStartS1, saveStartS2); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
138 changeStartS1 = saveStartS1 + matchLength; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
139 changeStartS2 = saveStartS2 + matchLength; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
140 // System.out.printf("match: from line #%d and line #%d of length %d\n", saveStartS1, saveStartS2, matchLength); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
141 if (saveStartS1+matchLength < endS1 && saveStartS2+matchLength < endS2) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
142 findMatchingBlocks(saveStartS1 + matchLength, endS1, saveStartS2 + matchLength, endS2); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
143 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
144 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
145 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
146 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
147 private Patch deltaCollector; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
148 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
149 private void reportDeltaElement(int i, int j) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
150 if (changeStartS1 < i) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
151 if (changeStartS2 < j) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
152 System.out.printf("changed [%d..%d) with [%d..%d)\n", changeStartS1, i, changeStartS2, j); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
153 } else { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
154 assert changeStartS2 == j; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
155 System.out.printf("deleted [%d..%d)\n", changeStartS1, i); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
156 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
157 if (deltaCollector != null) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
158 int from = seq1.chunk(changeStartS1).getOffset(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
159 int to = seq1.chunk(i).getOffset(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
160 byte[] data = seq2.data(changeStartS2, j); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
161 deltaCollector.add(from, to, data); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
162 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
163 } else { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
164 assert changeStartS1 == i; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
165 if(changeStartS2 < j) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
166 System.out.printf("added [%d..%d)\n", changeStartS2, j); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
167 } else { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
168 assert changeStartS2 == j; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
169 System.out.printf("adjustent equal blocks %d, %d and %d,%d\n", changeStartS1, i, changeStartS2, j); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
170 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
171 if (deltaCollector != null) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
172 int insPoint = seq1.chunk(changeStartS1).getOffset(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
173 byte[] data = seq2.data(changeStartS2, j); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
174 deltaCollector.add(insPoint, insPoint, data); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
175 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
176 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
177 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
178 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
179 public static void main(String[] args) throws Exception { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
180 HgRepository repo = new HgLookup().detectFromWorkingDir(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
181 HgDataFile df = repo.getFileNode("cmdline/org/tmatesoft/hg/console/Main.java"); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
182 ByteArrayChannel bac1, bac2; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
183 df.content(80, bac1 = new ByteArrayChannel()); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
184 df.content(81, bac2 = new ByteArrayChannel()); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
185 // String s1 = "line 1\nline 2\r\nline 3\n\nline 1\nline 2"; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
186 // String s2 = "abc\ncdef\r\nline 2\r\nline 3\nline 2"; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
187 PatchGenerator pg = new PatchGenerator(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
188 pg.init(bac1.toArray(), bac2.toArray()); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
189 pg.findMatchingBlocks(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
190 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
191 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
192 public Patch delta(byte[] prev, byte[] content) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
193 deltaCollector = new Patch(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
194 init(prev, content); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
195 findMatchingBlocks(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
196 return deltaCollector; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
197 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
198 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
199 private static class ChunkSequence { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
200 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
201 private final byte[] input; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
202 private ArrayList<ByteChain> lines; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
203 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
204 public ChunkSequence(byte[] data) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
205 input = data; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
206 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
207 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
208 public void splitByNewlines() { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
209 lines = new ArrayList<ByteChain>(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
210 int lastStart = 0; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
211 for (int i = 0; i < input.length; i++) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
212 if (input[i] == '\n') { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
213 lines.add(new ByteChain(lastStart, i+1)); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
214 lastStart = i+1; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
215 } else if (input[i] == '\r') { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
216 if (i+1 < input.length && input[i+1] == '\n') { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
217 i++; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
218 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
219 lines.add(new ByteChain(lastStart, i+1)); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
220 lastStart = i+1; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
221 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
222 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
223 if (lastStart < input.length) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
224 lines.add(new ByteChain(lastStart, input.length)); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
225 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
226 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
227 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
228 public ByteChain chunk(int index) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
229 return lines.get(index); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
230 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
231 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
232 public int chunkCount() { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
233 return lines.size(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
234 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
235 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
236 public byte[] data(int chunkFrom, int chunkTo) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
237 if (chunkFrom == chunkTo) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
238 return new byte[0]; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
239 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
240 int from = chunk(chunkFrom).getOffset(), to = chunk(chunkTo).getOffset(); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
241 byte[] rv = new byte[to - from]; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
242 System.arraycopy(input, from, rv, 0, rv.length); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
243 return rv; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
244 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
245 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
246 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
247 final class ByteChain { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
248 private final int start, end; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
249 private final int hash; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
250 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
251 ByteChain(int s, int e) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
252 start = s; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
253 end = e; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
254 hash = calcHash(input, s, e); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
255 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
256 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
257 /** | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
258 * byte offset of the this ByteChain inside ChainSequence | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
259 */ | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
260 public int getOffset() { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
261 return start; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
262 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
263 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
264 public byte[] data() { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
265 byte[] rv = new byte[end - start]; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
266 System.arraycopy(input, start, rv, 0, rv.length); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
267 return rv; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
268 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
269 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
270 @Override | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
271 public boolean equals(Object obj) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
272 if (obj == null || obj.getClass() != ByteChain.class) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
273 return false; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
274 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
275 ByteChain other = (ByteChain) obj; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
276 if (other.hash != hash || other.end - other.start != end - start) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
277 return false; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
278 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
279 return other.match(input, start); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
280 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
281 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
282 private boolean match(byte[] oi, int from) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
283 for (int i = start, j = from; i < end; i++, j++) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
284 if (ChunkSequence.this.input[i] != oi[j]) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
285 return false; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
286 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
287 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
288 return true; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
289 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
290 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
291 @Override | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
292 public int hashCode() { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
293 return hash; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
294 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
295 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
296 @Override | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
297 public String toString() { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
298 return String.format("[@%d\"%s\"]", start, new String(data())); | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
299 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
300 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
301 | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
302 // same as Arrays.hashCode(byte[]), just for a slice of a bigger array | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
303 static int calcHash(byte[] data, int from, int to) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
304 int result = 1; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
305 for (int i = from; i < to; i++) { | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
306 result = 31 * result + data[i]; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
307 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
308 return result; | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
309 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
310 } | 
| 
 
e6f72c9829a6
Generate patches using diff algorithm
 
Artem Tikhomirov <tikhomirov.artem@gmail.com> 
parents:  
diff
changeset
 | 
311 } | 
