1
=== modified file 'NEWS'
2
--- NEWS	2009-08-30 23:51:10 +0000
3
+++ NEWS	2009-08-31 04:37:43 +0000
4
@@ -771,6 +771,7 @@
5
771
Improvements
771
Improvements
6
772
************
772
************
7
773
773
8
774
<<<<<<< TREE
9
774
* ``bzr annotate`` can now be significantly faster. The time for
775
* ``bzr annotate`` can now be significantly faster. The time for
10
775
  ``bzr annotate NEWS`` is down to 7s from 22s in 1.16. Files with long
776
  ``bzr annotate NEWS`` is down to 7s from 22s in 1.16. Files with long
11
776
  histories and lots of 'duplicate insertions' will be improved more than
777
  histories and lots of 'duplicate insertions' will be improved more than
12
@@ -786,6 +787,12 @@
13
786
* Initial commit performance in ``--2a`` repositories has been improved by
787
* Initial commit performance in ``--2a`` repositories has been improved by
14
787
  making it cheaper to build the initial CHKMap. (John Arbash Meinel)
788
  making it cheaper to build the initial CHKMap. (John Arbash Meinel)
15
788
789
16
790
=======
17
791
* ``bzr log FILE`` is now substantially faster on flat-ish histories.
18
792
  On OpenOffice.org for example, logging a typical file now takes
19
793
  a second or so instead of 29 seconds. (Ian Clatworthy)
20
794
21
795
>>>>>>> MERGE-SOURCE
22
789
* Resolving a revno to a revision id on a branch accessed via ``bzr://``
796
* Resolving a revno to a revision id on a branch accessed via ``bzr://``
23
790
  or ``bzr+ssh://`` is now much faster and involves no VFS operations.
797
  or ``bzr+ssh://`` is now much faster and involves no VFS operations.
24
791
  This speeds up commands like ``bzr pull -r 123``.  (Andrew Bennetts)
798
  This speeds up commands like ``bzr pull -r 123``.  (Andrew Bennetts)
25
792
799
26
=== modified file 'bzrlib/builtins.py'
27
--- bzrlib/builtins.py	2009-08-28 05:00:33 +0000
28
+++ bzrlib/builtins.py	2009-08-31 04:37:44 +0000
29
@@ -2332,16 +2332,14 @@
30
2332
            # the underlying repository format is faster at generating
2332
            # the underlying repository format is faster at generating
31
2333
            # deltas or can provide everything we need from the indices.
2333
            # deltas or can provide everything we need from the indices.
32
2334
            # The default algorithm - match-using-deltas - works for
2334
            # The default algorithm - match-using-deltas - works for
35
2335
            # multiple files and directories and is faster for small
2335
            # multiple files and directories. However, it's too
34
2336
            # amounts of history (200 revisions say). However, it's too
36
2337
            # slow for logging a single file in a repository with deep
2336
            # slow for logging a single file in a repository with deep
37
2338
            # history, i.e. > 10K revisions. In the spirit of "do no
2337
            # history, i.e. > 10K revisions. In the spirit of "do no
38
2339
            # evil when adding features", we continue to use the
2338
            # evil when adding features", we continue to use the
39
2340
            # original algorithm - per-file-graph - for the "single
2339
            # original algorithm - per-file-graph - for the "single
40
2341
            # file that isn't a directory without showing a delta" case.
2340
            # file that isn't a directory without showing a delta" case.
41
2342
            partial_history = revision and b.repository._format.supports_chks
42
2343
            match_using_deltas = (len(file_ids) != 1 or filter_by_dir
2341
            match_using_deltas = (len(file_ids) != 1 or filter_by_dir
44
2344
                or delta_type or partial_history)
2342
                or delta_type)
45
2345
2343
46
2346
            # Build the LogRequest and execute it
2344
            # Build the LogRequest and execute it
47
2347
            if len(file_ids) == 0:
2345
            if len(file_ids) == 0:
48
2348
2346
49
=== modified file 'bzrlib/log.py'
50
--- bzrlib/log.py	2009-06-10 03:56:49 +0000
51
+++ bzrlib/log.py	2009-08-31 04:37:44 +0000
52
@@ -70,6 +70,7 @@
53
70
    diff,
70
    diff,
54
71
    errors,
71
    errors,
55
72
    foreign,
72
    foreign,
56
73
    graph,
57
73
    repository as _mod_repository,
74
    repository as _mod_repository,
58
74
    revision as _mod_revision,
75
    revision as _mod_revision,
59
75
    revisionspec,
76
    revisionspec,
60
@@ -460,21 +461,150 @@
61
460
            direction=rqst.get('direction'))
461
            direction=rqst.get('direction'))
62
461
462
63
462
    def _log_revision_iterator_using_per_file_graph(self):
463
    def _log_revision_iterator_using_per_file_graph(self):
64
463
        # Get the base revisions, filtering by the revision range.
65
464
        # Note that we always generate the merge revisions because
66
465
        # filter_revisions_touching_file_id() requires them ...
67
466
        rqst = self.rqst
464
        rqst = self.rqst
75
467
        view_revisions = _calc_view_revisions(self.branch, self.start_rev_id,
465
        direction = rqst.get('direction')
76
468
            self.end_rev_id, rqst.get('direction'), True)
466
        file_id = rqst.get('specific_fileids')[0]
77
469
        if not isinstance(view_revisions, list):
467
        multi_level = rqst.get('levels') != 1
78
470
            view_revisions = list(view_revisions)
468
        try:
79
471
        view_revisions = _filter_revisions_touching_file_id(self.branch,
469
            file_graph, graph_tip = _per_file_graph(self.branch, file_id,
80
472
            rqst.get('specific_fileids')[0], view_revisions,
470
                self.end_rev_id)
81
473
            include_merges=rqst.get('levels') != 1)
471
        except errors.NoSuchId:
82
472
            # File doesn't exist at end of range - fall back to old algorithm
83
473
            view_revisions = None
84
474
        else:
85
475
            # Try iterating over the revisions given by the per-file graph.
86
476
            # This returns None if it fails.
87
477
            view_revisions = _calc_view_revisions_for_file(self.branch,
88
478
                file_graph, graph_tip, self.start_rev_id, self.end_rev_id,
89
479
                direction, multi_level)
90
480
91
481
        if view_revisions is None:
92
482
            # Get the base revisions, filtering by the revision range.
93
483
            # Note that we always generate the merge revisions because
94
484
            # filter_revisions_touching_file_id() requires them ...
95
485
            view_revisions = _calc_view_revisions(self.branch,
96
486
                self.start_rev_id, self.end_rev_id, direction, True)
97
487
            if not isinstance(view_revisions, list):
98
488
                view_revisions = list(view_revisions)
99
489
            # TODO: pass in the already calculated file graph and re-use it
100
490
            view_revisions = _filter_revisions_touching_file_id(self.branch,
101
491
                file_id, view_revisions, include_merges=multi_level)
102
474
        return make_log_rev_iterator(self.branch, view_revisions,
492
        return make_log_rev_iterator(self.branch, view_revisions,
103
475
            rqst.get('delta_type'), rqst.get('message_search'))
493
            rqst.get('delta_type'), rqst.get('message_search'))
104
476
494
105
477
495
106
496
def _per_file_graph(branch, file_id, end_rev_id):
107
497
    """Get the per file graph.
108
498
109
499
    :param end_rev_id: the last interesting revision-id or None to use
110
500
      the basis tree. If non-None, the file must exist in that revision
111
501
      or NoSuchId will be raised.
112
502
    :return: graph, tip where
113
503
      graph is a Graph with (file_id,rev_id) tuple keys and
114
504
      tip is the graph tip
115
505
    """
116
506
    # Find when the file was last modified
117
507
    if end_rev_id is None:
118
508
        rev_tree = branch.basis_tree()
119
509
    else:
120
510
        rev_tree = branch.repository.revision_tree(end_rev_id)
121
511
    last_modified = rev_tree.inventory[file_id].revision
122
512
123
513
    # Return the result
124
514
    tip = (file_id, last_modified)
125
515
    return graph.Graph(branch.repository.texts), tip
126
516
127
517
128
518
def _calc_view_revisions_for_file(branch, file_graph, graph_tip, start_rev_id,
129
519
    end_rev_id, direction, include_merges):
130
520
    """Calculate the revisions to view for a file.
131
521
132
522
    :param file_graph: the per-file graph
133
523
    :param graph_tip: the tip of the per-file graph
134
524
    :param include_merges: if True, include all revisions, not just the top
135
525
      level
136
526
    :return: An list of (revision_id, dotted_revno, merge_depth) tuples OR
137
527
      None if the algorithm fails (and another one should be used).
138
528
    """
139
529
    br_revno, br_rev_id = branch.last_revision_info()
140
530
    if br_revno == 0:
141
531
        return []
142
532
143
533
    # Find the revisions where the file was changed and merged
144
534
    file_rev_ids = []
145
535
    file_merges = []
146
536
    for (_, rev_id), parents in file_graph.iter_ancestry([graph_tip]):
147
537
        file_rev_ids.append(rev_id)
148
538
        if len(parents) > 1:
149
539
            file_merges.append(rev_id)
150
540
151
541
    # Handle the simple cases
152
542
    if len(file_rev_ids) == 1:
153
543
        return _generate_one_revision(branch, file_rev_ids[0], br_rev_id,
154
544
            br_revno)
155
545
    elif len(file_rev_ids) == 0:
156
546
        # Should this ever happen?
157
547
        return []
158
548
    elif file_merges and include_merges:
159
549
        # Fall back to the old algorithm for now
160
550
        return None
161
551
162
552
    # Find all the revisions we can using a linear search
163
553
    result = []
164
554
    missing = set(file_rev_ids)
165
555
    merges_to_search = 0
166
556
    created_timestamp = None
167
557
    try:
168
558
        candidates = _linear_view_revisions(branch, start_rev_id, end_rev_id)
169
559
        for index, (rev_id, revno, depth) in enumerate(candidates):
170
560
            if rev_id in missing:
171
561
                result.append((rev_id, revno, depth))
172
562
                missing.remove(rev_id)
173
563
                if len(missing) == 0:
174
564
                    break
175
565
            else:
176
566
                if _has_merges(branch, rev_id):
177
567
                    merges_to_search += 1
178
568
                    # If this is a dense tree, this optimisation is unlikely
179
569
                    # to result in a net win - fall back to old algorithm.
180
570
                    if merges_to_search > 100:
181
571
                        return None
182
572
                # Check the timestamp to avoid going back too far on the
183
573
                # mainline for files created in merge revisions. We don't
184
574
                # do this every revision, just regularly, to minimise the
185
575
                # number of revisions that we load at this point.
186
576
                if index and index % 500 == 0:
187
577
                    if created_timestamp is None:
188
578
                        created_rev = branch.repository.get_revision(
189
579
                            file_rev_ids[-1])
190
580
                        created_timestamp = created_rev.timestamp
191
581
                    rev = branch.repository.get_revision(rev_id)
192
582
                    if created_timestamp > rev.timestamp:
193
583
                        return None
194
584
195
585
    except _StartNotLinearAncestor:
196
586
        raise errors.BzrCommandError('Start revision not found in'
197
587
            ' left-hand history of end revision.')
198
588
199
589
    # If no merges were found in the revision range, then we can be
200
590
    # certain that we've found all the revisions we care about.
201
591
    if missing and merges_to_search:
202
592
        # TODO: search the deltas of the merges, splicing successful
203
593
        # matches into their rightful spots. That should work well on
204
594
        # chk repositories for typical histories but we need to benchmark
205
595
        # it to confirm. There's most likely a sweet spot above which
206
596
        # the O(history) traditional way - generating the full graph of
207
597
        # history and post-filtering - remains the best performer.
208
598
        trace.mutter("log file fastpath failed to find %d revisions" %
209
599
            len(missing))
210
600
        return None
211
601
212
602
    # We came, we saw, we walked away victorious ...
213
603
    if direction == 'forward':
214
604
        result = reversed(result)
215
605
    return result
216
606
217
607
218
478
def _calc_view_revisions(branch, start_rev_id, end_rev_id, direction,
608
def _calc_view_revisions(branch, start_rev_id, end_rev_id, direction,
219
479
    generate_merge_revisions, delayed_graph_generation=False):
609
    generate_merge_revisions, delayed_graph_generation=False):
220
480
    """Calculate the revisions to view.
610
    """Calculate the revisions to view.
Status:	Work in progress
Proposed branch:	lp:~ian-clatworthy/bzr/faster-log-file
Merge into:	lp:~bzr/bzr/trunk-old
Diff against target:	220 lines (has conflicts) Text conflict in NEWS
To merge this branch:	bzr merge lp:~ian-clatworthy/bzr/faster-log-file
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Martin Pool		2009-06-17	Needs Information on 2009-07-24
Review via email: mp+7535@code.launchpad.net