Mercurial > hg4j
changeset 656:a937e63b6e02
Performance: rebuild information about branches takes too long (my improvement: 3 times, 11-15 s to less than 4 sec)
| author | Artem Tikhomirov <tikhomirov.artem@gmail.com> | 
|---|---|
| date | Thu, 04 Jul 2013 18:40:03 +0200 | 
| parents | bcbcc318f250 | 
| children | 6334b0267103 | 
| files | cmdline/org/tmatesoft/hg/console/Main.java src/org/tmatesoft/hg/internal/IntMap.java src/org/tmatesoft/hg/repo/HgBranches.java src/org/tmatesoft/hg/repo/HgChangelog.java src/org/tmatesoft/hg/repo/HgParentChildMap.java test/org/tmatesoft/hg/test/TestRevisionMaps.java | 
| diffstat | 6 files changed, 114 insertions(+), 104 deletions(-) [+] | 
line wrap: on
 line diff
--- a/cmdline/org/tmatesoft/hg/console/Main.java Thu Jul 04 18:36:38 2013 +0200 +++ b/cmdline/org/tmatesoft/hg/console/Main.java Thu Jul 04 18:40:03 2013 +0200 @@ -103,7 +103,7 @@ public static void main(String[] args) throws Exception { Main m = new Main(args); - m.checkFileSneakerPerformance(); +// m.checkFileSneakerPerformance(); // m.testRevert(); // m.testCheckout(); // m.tryExtensions(); @@ -119,7 +119,7 @@ // m.testEffectiveFileLog(); // m.testMergeState(); // m.testFileStatus(); -// m.dumpBranches(); + m.dumpBranches(); // m.inflaterLengthException(); // m.dumpIgnored(); // m.dumpDirstate();
--- a/src/org/tmatesoft/hg/internal/IntMap.java Thu Jul 04 18:36:38 2013 +0200 +++ b/src/org/tmatesoft/hg/internal/IntMap.java Thu Jul 04 18:40:03 2013 +0200 @@ -17,6 +17,7 @@ package org.tmatesoft.hg.internal; import java.util.Arrays; +import java.util.Collection; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; @@ -216,6 +217,13 @@ } return map; } + + public Collection<V> values() { + @SuppressWarnings("unchecked") + V[] rv = (V[]) new Object[size]; + System.arraycopy(values, 0, rv, 0, size); + return Arrays.<V>asList(rv); + } // copy of Arrays.binarySearch, with upper search limit as argument private static int binarySearch(int[] a, int high, int key) {
--- a/src/org/tmatesoft/hg/repo/HgBranches.java Thu Jul 04 18:36:38 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgBranches.java Thu Jul 04 18:40:03 2013 +0200 @@ -27,8 +27,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.LinkedList; @@ -121,7 +121,7 @@ } return -1; // deliberately not lastInCache, to avoid anything but -1 when 1st line was read and there's error is in lines 2..end } - + void collect(final ProgressSupport ps) throws HgRuntimeException { branches.clear(); final HgRepository repo = internalRepo.getRepo(); @@ -133,91 +133,68 @@ final HgParentChildMap<HgChangelog> pw = new HgParentChildMap<HgChangelog>(repo.getChangelog()); pw.init(); ps.worked(repo.getChangelog().getRevisionCount()); + // // first revision branch found at final HashMap<String, Nodeid> branchStart = new HashMap<String, Nodeid>(); - // last revision seen for the branch - final HashMap<String, Nodeid> branchLastSeen = new HashMap<String, Nodeid>(); // revisions from the branch that have no children at all final HashMap<String, List<Nodeid>> branchHeads = new HashMap<String, List<Nodeid>>(); - // revisions that are immediate children of a node from a given branch - // after iteration, there are some revisions left in this map (children of a branch last revision - // that doesn't belong to the branch. No use of this now, perhaps can deduce isInactive (e.g.those - // branches that have non-empty candidates are inactive if all their heads are roots for those left) - final HashMap<String, List<Nodeid>> branchHeadCandidates = new HashMap<String, List<Nodeid>>(); HgChangelog.Inspector insp = new HgChangelog.Inspector() { + private final ArrayList<Nodeid> parents = new ArrayList<Nodeid>(3); + public void next(int revisionNumber, Nodeid nodeid, RawChangeset cset) { String branchName = cset.branch(); + List<Nodeid> _branchHeads; + // there are chances (with --force key) branch can get more than one start + // revision. Neither BranchInfo nor this code support this scenario at the moment. if (!branchStart.containsKey(branchName)) { branchStart.put(branchName, nodeid); - branchHeads.put(branchName, new LinkedList<Nodeid>()); - branchHeadCandidates.put(branchName, new LinkedList<Nodeid>()); + branchHeads.put(branchName, _branchHeads = new LinkedList<Nodeid>()); } else { - final List<Nodeid> headCandidates = branchHeadCandidates.get(branchName); - if (headCandidates.remove(nodeid)) { - // likely we don't need to keep parent anymore, as we found at least 1 child thereof to be at the same branch - // however, it's possible the child we found is a result of an earlier fork, and revision in the - // branchLastSeen is 'parallel' head, which needs to be kept - Nodeid lastSeenInBranch = branchLastSeen.get(branchName); - // check if current revision is on descendant line. Seems direct parents check is enough - if (pw.safeFirstParent(nodeid).equals(lastSeenInBranch) || pw.safeSecondParent(nodeid).equals(lastSeenInBranch)) { - branchLastSeen.remove(branchName); + _branchHeads = branchHeads.get(branchName); + if (_branchHeads == null) { + branchHeads.put(branchName, _branchHeads = new LinkedList<Nodeid>()); + } + } + // so far present node is the best candidate for head + _branchHeads.add(nodeid); + parents.clear(); + // parents of this node, however, cease to be heads (if they are from this branch) + pw.appendParentsOf(nodeid, parents); + _branchHeads.removeAll(parents); + ps.worked(1); + } + }; + // XXX alternatively may iterate with pw.all().subList(lastCached) + // but need an effective way to find out branch of particular changeset + repo.getChangelog().range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp); + // + // build BranchInfo, based on found and cached + for (String bn : branchStart.keySet()) { + BranchInfo bi = branches.get(bn); + if (bi != null) { + // combine heads found so far with those cached + LinkedHashSet<Nodeid> oldHeads = new LinkedHashSet<Nodeid>(bi.getHeads()); + // expect size of both oldHeads and newHeads sets to be small, and for x for hence acceptable. + for (Nodeid newHead : branchHeads.get(bn)) { + for (Iterator<Nodeid> it = oldHeads.iterator(); it.hasNext();) { + if (pw.isChild(it.next(), newHead)) { + it.remove(); } } } - List<Nodeid> immediateChildren = pw.directChildren(nodeid); - if (immediateChildren.size() > 0) { - // 1) children may be in another branch - // and unless we later came across another element from this branch, - // we need to record all these as potential heads - // - // 2) head1 with children in different branch, and head2 in this branch without children - branchLastSeen.put(branchName, nodeid); - branchHeadCandidates.get(branchName).addAll(immediateChildren); - } else { - // no more children known for this node, it's (one of the) head of the branch - branchHeads.get(branchName).add(nodeid); - } - ps.worked(1); - } - }; - repo.getChangelog().range(lastCached == -1 ? 0 : lastCached+1, HgRepository.TIP, insp); - // those last seen revisions from the branch that had no children from the same branch are heads. - for (String bn : branchLastSeen.keySet()) { - // these are inactive branches? - there were children, but not from the same branch? - branchHeads.get(bn).add(branchLastSeen.get(bn)); - } - for (String bn : branchStart.keySet()) { - BranchInfo bi = branches.get(bn); - if (bi != null) { - // although heads from cache shall not intersect with heads after lastCached, - // use of LHS doesn't hurt (and makes sense e.g. if cache is not completely correct in my tests) - LinkedHashSet<Nodeid> heads = new LinkedHashSet<Nodeid>(bi.getHeads()); - for (Nodeid oldHead : bi.getHeads()) { - // XXX perhaps, need pw.canReach(Nodeid from, Collection<Nodeid> to) - List<Nodeid> newChildren = pw.childrenOf(Collections.singletonList(oldHead)); - if (!newChildren.isEmpty()) { - // likely not a head any longer, - // check if any new head can be reached from old one, and, if yes, - // do not consider that old head as head. - for (Nodeid newHead : branchHeads.get(bn)) { - if (newChildren.contains(newHead)) { - heads.remove(oldHead); - break; - } - } - } // else - oldHead still head for the branch - } - heads.addAll(branchHeads.get(bn)); - bi = new BranchInfo(bn, bi.getStart(), heads.toArray(new Nodeid[0])); + oldHeads.addAll(branchHeads.get(bn)); + assert oldHeads.size() > 0; + bi = new BranchInfo(bn, bi.getStart(), oldHeads.toArray(new Nodeid[oldHeads.size()])); } else { Nodeid[] heads = branchHeads.get(bn).toArray(new Nodeid[0]); bi = new BranchInfo(bn, branchStart.get(bn), heads); } branches.put(bn, bi); } - } + } // !cacheActual final HgChangelog clog = repo.getChangelog(); + /// FIXME use HgParentChildMap if available (need to decide how to get HgRevisionMap and HgParentChildMap to a common denominator) final HgRevisionMap<HgChangelog> rmap = new HgRevisionMap<HgChangelog>(clog).init(); for (BranchInfo bi : branches.values()) { bi.validate(clog, rmap); @@ -388,6 +365,7 @@ } // public Nodeid getTip() { // } + // XXX Not public as there are chances for few possible branch starts, and I need to decide how to handle that /*public*/ Nodeid getStart() { // first node where branch appears return start;
--- a/src/org/tmatesoft/hg/repo/HgChangelog.java Thu Jul 04 18:36:38 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgChangelog.java Thu Jul 04 18:40:03 2013 +0200 @@ -305,23 +305,7 @@ // unixTime is local time, and timezone records difference of the local time to UTC. Date _time = new Date(unixTime * 1000); String _extras = space2 < _timeString.length() ? _timeString.substring(space2 + 1) : null; - Map<String, String> _extrasMap; - final String extras_branch_key = "branch"; - if (_extras == null || _extras.trim().length() == 0) { - _extrasMap = Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); - } else { - _extrasMap = new HashMap<String, String>(); - for (String pair : _extras.split("\00")) { - pair = decode(pair); - int eq = pair.indexOf(':'); - _extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1)); - } - if (!_extrasMap.containsKey(extras_branch_key)) { - _extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); - } - _extrasMap = Collections.unmodifiableMap(_extrasMap); - } - + Map<String, String> _extrasMap = parseExtras(_extras); // int lastStart = breakIndex3 + 1; int breakIndex4 = indexOf(data, lineBreak, lastStart, bufferEndIndex); @@ -329,6 +313,8 @@ if (breakIndex4 > lastStart) { // if breakIndex4 == lastStart, we already found \n\n and hence there are no files (e.g. merge revision) _files = new ArrayList<String>(5); + // TODO pool file names + // TODO encoding of filenames? while (breakIndex4 != -1 && breakIndex4 + 1 < bufferEndIndex) { _files.add(new String(data, lastStart, breakIndex4 - lastStart)); lastStart = breakIndex4 + 1; @@ -364,6 +350,34 @@ this.extras = _extrasMap; } + private Map<String, String> parseExtras(String _extras) { + final String extras_branch_key = "branch"; + _extras = _extras == null ? null : _extras.trim(); + if (_extras == null || _extras.length() == 0) { + return Collections.singletonMap(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); + } + Map<String, String> _extrasMap = new HashMap<String, String>(); + int lastIndex = 0; + do { + String pair; + int sp = _extras.indexOf('\0', lastIndex); + if (sp == -1) { + sp = _extras.length(); + } + if (sp > lastIndex) { + pair = _extras.substring(lastIndex, sp); + pair = decode(pair); + int eq = pair.indexOf(':'); + _extrasMap.put(pair.substring(0, eq), pair.substring(eq + 1)); + lastIndex = sp + 1; + } + } while (lastIndex < _extras.length()); + if (!_extrasMap.containsKey(extras_branch_key)) { + _extrasMap.put(extras_branch_key, HgRepository.DEFAULT_BRANCH_NAME); + } + return Collections.unmodifiableMap(_extrasMap); + } + private static int indexOf(byte[] src, byte what, int startOffset, int endIndex) { for (int i = startOffset; i < endIndex; i++) { if (src[i] == what) {
--- a/src/org/tmatesoft/hg/repo/HgParentChildMap.java Thu Jul 04 18:36:38 2013 +0200 +++ b/src/org/tmatesoft/hg/repo/HgParentChildMap.java Thu Jul 04 18:40:03 2013 +0200 @@ -28,6 +28,7 @@ import java.util.List; import org.tmatesoft.hg.core.Nodeid; +import org.tmatesoft.hg.internal.IntMap; import org.tmatesoft.hg.repo.Revlog.ParentInspector; /** @@ -67,7 +68,8 @@ private Nodeid[] firstParent; // parents by natural order (i.e. firstParent[A] is parent of revision with index A) private Nodeid[] secondParent; private final T revlog; - private BitSet heads; // 1 indicates revision got children + private IntMap<Nodeid> heads; + private BitSet headsBitSet; // 1 indicates revision got children, != null only during init; public HgParentChildMap(T owner) { @@ -86,11 +88,11 @@ sequential[ix] = sorted[ix] = revision; if (parent1Revision != -1) { firstParent[ix] = sequential[parent1Revision]; - heads.set(parent1Revision); + headsBitSet.set(parent1Revision); } if (parent2Revision != -1) { // revlog of DataAccess.java has p2 set when p1 is -1 secondParent[ix] = sequential[parent2Revision]; - heads.set(parent2Revision); + headsBitSet.set(parent2Revision); } } @@ -104,12 +106,13 @@ firstParent = new Nodeid[revisionCount]; // TODO [post 1.1] Branches/merges are less frequent, and most of secondParent would be -1/null, hence // IntMap might be better alternative here, but need to carefully analyze (test) whether this brings - // real improvement (IntMap has 2n capacity, and element lookup is log(n) instead of array's constant) + // real improvement (IntMap has 2n capacity, and element lookup is log(n) instead of array's constant). + // FWIW: in cpython's repo, with 70k+ revisions, there are 2618 values in secondParent secondParent = new Nodeid[revisionCount]; // sequential = new Nodeid[revisionCount]; sorted = new Nodeid[revisionCount]; - heads = new BitSet(revisionCount); + headsBitSet = new BitSet(revisionCount); revlog.indexWalk(0, TIP, this); Arrays.sort(sorted); // FIXME use ArrayHelper instead @@ -120,6 +123,19 @@ assertSortedIndex(x); sorted2natural[x] = i; } + // no reason to keep BitSet, number of heads is usually small + IntMap<Nodeid> _heads = new IntMap<Nodeid>(headsBitSet.size() - headsBitSet.cardinality()); + int index = 0; + while (index < sequential.length) { + index = headsBitSet.nextClearBit(index); + // nextClearBit(length-1) gives length when bit is set, + // however, last revision can't be a parent of any other, and + // the last bit would be always 0, and no AIOOBE + _heads.put(index, sequential[index]); + index++; + } + headsBitSet = null; + heads = _heads; } private void assertSortedIndex(int x) { @@ -215,6 +231,8 @@ return result; } + public long AAA = 0; + /** * @return revisions that have supplied revision as their immediate parent */ @@ -226,8 +244,9 @@ if (!hasChildren(start)) { return Collections.emptyList(); } - LinkedList<Nodeid> result = new LinkedList<Nodeid>(); + ArrayList<Nodeid> result = new ArrayList<Nodeid>(5); for (int i = start + 1; i < sequential.length; i++) { + AAA++; if (nid == firstParent[i] || nid == secondParent[i]) { result.add(sequential[i]); } @@ -291,20 +310,10 @@ * @return elements of this map that do not have a child recorded therein. */ public Collection<Nodeid> heads() { - ArrayList<Nodeid> result = new ArrayList<Nodeid>(); - int index = 0; - do { - index = heads.nextClearBit(index); - // nextClearBit(length-1) gives length when bit is set, - // however, last revision can't be a parent of any other, and - // the last bit would be always 0, and no AIOOBE - result.add(sequential[index]); - index++; - } while (index < sequential.length); - return result; + return heads.values(); } private boolean hasChildren(int sequentialIndex) { - return heads.get(sequentialIndex); + return !heads.containsKey(sequentialIndex); } } \ No newline at end of file
--- a/test/org/tmatesoft/hg/test/TestRevisionMaps.java Thu Jul 04 18:36:38 2013 +0200 +++ b/test/org/tmatesoft/hg/test/TestRevisionMaps.java Thu Jul 04 18:40:03 2013 +0200 @@ -16,6 +16,7 @@ */ package org.tmatesoft.hg.test; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashSet; @@ -84,7 +85,7 @@ } // heads - errorCollector.assertEquals(Arrays.asList(allRevs[7], allRevs[9]), parentHelper.heads()); + errorCollector.assertEquals(Arrays.asList(allRevs[7], allRevs[9]), new ArrayList<Nodeid>(parentHelper.heads())); // isChild errorCollector.assertTrue(parentHelper.isChild(allRevs[1], allRevs[9])); errorCollector.assertTrue(parentHelper.isChild(allRevs[0], allRevs[7]));
