1
=== modified file 'NEWS'
2
--- NEWS	2010-04-08 04:34:03 +0000
3
+++ NEWS	2010-04-08 07:11:22 +0000
4
@@ -38,6 +38,12 @@
5
38
  generated by a template and not edited by the user.
38
  generated by a template and not edited by the user.
6
39
  (Robert Collins, #530265)
39
  (Robert Collins, #530265)
7
40
40
8
41
* Index lookups in pack repositories search recently hit pack files first.  
9
42
  In repositories with many pack files this can greatly reduce the
10
43
  number of files accessed, the number of bytes read, and the number of
11
44
  read calls.  An incremental pull via plain HTTP takes half the time and
12
45
  bytes for a moderately large repository.  (Andrew Bennetts)
13
46
14
41
* Less code is loaded at startup.  (Cold-cache start time is about 10-20%
47
* Less code is loaded at startup.  (Cold-cache start time is about 10-20%
15
42
  less.)
48
  less.)
16
43
  (Martin Pool, #553017)
49
  (Martin Pool, #553017)
17
44
50
18
=== modified file 'bzrlib/index.py'
19
--- bzrlib/index.py	2010-03-05 17:56:55 +0000
20
+++ bzrlib/index.py	2010-04-08 07:11:22 +0000
21
@@ -1245,10 +1245,15 @@
22
1245
    static data.
1245
    static data.
23
1246
1246
24
1247
    Queries against the combined index will be made against the first index,
1247
    Queries against the combined index will be made against the first index,
26
1248
    and then the second and so on. The order of index's can thus influence
1248
    and then the second and so on. The order of indices can thus influence
27
1249
    performance significantly. For example, if one index is on local disk and a
1249
    performance significantly. For example, if one index is on local disk and a
28
1250
    second on a remote server, the local disk index should be before the other
1250
    second on a remote server, the local disk index should be before the other
29
1251
    in the index list.
1251
    in the index list.
30
1252
    
31
1253
    Also, queries tend to need results from the same indices as previous
32
1254
    queries.  So the indices will be reordered after every query to put the
33
1255
    indices that had the result(s) of that query first (while otherwise
34
1256
    preserving the relative ordering).
35
1252
    """
1257
    """
36
1253
1258
37
1254
    def __init__(self, indices, reload_func=None):
1259
    def __init__(self, indices, reload_func=None):
38
@@ -1261,6 +1266,13 @@
39
1261
        """
1266
        """
40
1262
        self._indices = indices
1267
        self._indices = indices
41
1263
        self._reload_func = reload_func
1268
        self._reload_func = reload_func
42
1269
        # Sibling indices are other CombinedGraphIndex that we should call
43
1270
        # _move_to_front_by_name on when we auto-reorder ourself.
44
1271
        self._sibling_indices = []
45
1272
        # A list of names that corresponds to the instances in self._indices,
46
1273
        # so _index_names[0] is always the name for _indices[0], etc.  Sibling
47
1274
        # indices must all use the same set of names as each other.
48
1275
        self._index_names = [None] * len(self._indices)
49
1264
1276
50
1265
    def __repr__(self):
1277
    def __repr__(self):
51
1266
        return "%s(%s)" % (
1278
        return "%s(%s)" % (
52
@@ -1289,13 +1301,17 @@
53
1289
1301
54
1290
    has_key = _has_key_from_parent_map
1302
    has_key = _has_key_from_parent_map
55
1291
1303
57
1292
    def insert_index(self, pos, index):
1304
    def insert_index(self, pos, index, name=None):
58
1293
        """Insert a new index in the list of indices to query.
1305
        """Insert a new index in the list of indices to query.
59
1294
1306
60
1295
        :param pos: The position to insert the index.
1307
        :param pos: The position to insert the index.
61
1296
        :param index: The index to insert.
1308
        :param index: The index to insert.
62
1309
        :param name: a name for this index, e.g. a pack name.  These names can
63
1310
            be used to reflect index reorderings to related CombinedGraphIndex
64
1311
            instances that use the same names.  (see set_sibling_indices)
65
1297
        """
1312
        """
66
1298
        self._indices.insert(pos, index)
1313
        self._indices.insert(pos, index)
67
1314
        self._index_names.insert(pos, name)
68
1299
1315
69
1300
    def iter_all_entries(self):
1316
    def iter_all_entries(self):
70
1301
        """Iterate over all keys within the index
1317
        """Iterate over all keys within the index
71
@@ -1326,22 +1342,28 @@
72
1326
        value and are only reported once.
1342
        value and are only reported once.
73
1327
1343
74
1328
        :param keys: An iterable providing the keys to be retrieved.
1344
        :param keys: An iterable providing the keys to be retrieved.
77
1329
        :return: An iterable of (index, key, reference_lists, value). There is no
1345
        :return: An iterable of (index, key, reference_lists, value). There is
78
1330
            defined order for the result iteration - it will be in the most
1346
            no defined order for the result iteration - it will be in the most
79
1331
            efficient order for the index.
1347
            efficient order for the index.
80
1332
        """
1348
        """
81
1333
        keys = set(keys)
1349
        keys = set(keys)
82
1350
        hit_indices = []
83
1334
        while True:
1351
        while True:
84
1335
            try:
1352
            try:
85
1336
                for index in self._indices:
1353
                for index in self._indices:
86
1337
                    if not keys:
1354
                    if not keys:
88
1338
                        return
1355
                        break
89
1356
                    index_hit = False
90
1339
                    for node in index.iter_entries(keys):
1357
                    for node in index.iter_entries(keys):
91
1340
                        keys.remove(node[1])
1358
                        keys.remove(node[1])
92
1341
                        yield node
1359
                        yield node
94
1342
                return
1360
                        index_hit = True
95
1361
                    if index_hit:
96
1362
                        hit_indices.append(index)
97
1363
                break
98
1343
            except errors.NoSuchFile:
1364
            except errors.NoSuchFile:
99
1344
                self._reload_or_raise()
1365
                self._reload_or_raise()
100
1366
        self._move_to_front(hit_indices)
101
1345
1367
102
1346
    def iter_entries_prefix(self, keys):
1368
    def iter_entries_prefix(self, keys):
103
1347
        """Iterate over keys within the index using prefix matching.
1369
        """Iterate over keys within the index using prefix matching.
104
@@ -1367,17 +1389,77 @@
105
1367
        if not keys:
1389
        if not keys:
106
1368
            return
1390
            return
107
1369
        seen_keys = set()
1391
        seen_keys = set()
108
1392
        hit_indices = []
109
1370
        while True:
1393
        while True:
110
1371
            try:
1394
            try:
111
1372
                for index in self._indices:
1395
                for index in self._indices:
112
1396
                    index_hit = False
113
1373
                    for node in index.iter_entries_prefix(keys):
1397
                    for node in index.iter_entries_prefix(keys):
114
1374
                        if node[1] in seen_keys:
1398
                        if node[1] in seen_keys:
115
1375
                            continue
1399
                            continue
116
1376
                        seen_keys.add(node[1])
1400
                        seen_keys.add(node[1])
117
1377
                        yield node
1401
                        yield node
119
1378
                return
1402
                        index_hit = True
120
1403
                    if index_hit:
121
1404
                        hit_indices.append(index)
122
1405
                break
123
1379
            except errors.NoSuchFile:
1406
            except errors.NoSuchFile:
124
1380
                self._reload_or_raise()
1407
                self._reload_or_raise()
125
1408
        self._move_to_front(hit_indices)
126
1409
127
1410
    def _move_to_front(self, hit_indices):
128
1411
        """Rearrange self._indices so that hit_indices are first.
129
1412
130
1413
        Order is maintained as much as possible, e.g. the first unhit index
131
1414
        will be the first index in _indices after the hit_indices, and the
132
1415
        hit_indices will be present in exactly the order they are passed to
133
1416
        _move_to_front.
134
1417
135
1418
        _move_to_front propagates to all objects in self._sibling_indices by
136
1419
        calling _move_to_front_by_name.
137
1420
        """
138
1421
        hit_names = self._move_to_front_by_index(hit_indices)
139
1422
        for sibling_idx in self._sibling_indices:
140
1423
            sibling_idx._move_to_front_by_name(hit_names)
141
1424
142
1425
    def _move_to_front_by_index(self, hit_indices):
143
1426
        """Core logic for _move_to_front.
144
1427
        
145
1428
        Returns a list of names corresponding to the hit_indices param.
146
1429
        """
147
1430
        indices_info = zip(self._index_names, self._indices)
148
1431
        if 'index' in debug.debug_flags:
149
1432
            mutter('CombinedGraphIndex reordering: currently %r, promoting %r',
150
1433
                   indices_info, hit_indices)
151
1434
        hit_indices_info = []
152
1435
        hit_names = []
153
1436
        unhit_indices_info = []
154
1437
        for name, idx in indices_info:
155
1438
            if idx in hit_indices:
156
1439
                info = hit_indices_info
157
1440
                hit_names.append(name)
158
1441
            else:
159
1442
                info = unhit_indices_info
160
1443
            info.append((name, idx))
161
1444
        final_info = hit_indices_info + unhit_indices_info
162
1445
        self._indices = [idx for (name, idx) in final_info]
163
1446
        self._index_names = [name for (name, idx) in final_info]
164
1447
        if 'index' in debug.debug_flags:
165
1448
            mutter('CombinedGraphIndex reordered: %r', self._indices)
166
1449
        return hit_names
167
1450
168
1451
    def _move_to_front_by_name(self, hit_names):
169
1452
        """Moves indices named by 'hit_names' to front of the search order, as
170
1453
        described in _move_to_front.
171
1454
        """
172
1455
        # Translate names to index instances, and then call
173
1456
        # _move_to_front_by_index.
174
1457
        indices_info = zip(self._index_names, self._indices)
175
1458
        hit_indices = []
176
1459
        for name, idx in indices_info:
177
1460
            if name in hit_names:
178
1461
                hit_indices.append(idx)
179
1462
        self._move_to_front_by_index(hit_indices)
180
1381
1463
181
1382
    def find_ancestry(self, keys, ref_list_num):
1464
    def find_ancestry(self, keys, ref_list_num):
182
1383
        """Find the complete ancestry for the given set of keys.
1465
        """Find the complete ancestry for the given set of keys.
183
@@ -1390,6 +1472,7 @@
184
1390
            we care about.
1472
            we care about.
185
1391
        :return: (parent_map, missing_keys)
1473
        :return: (parent_map, missing_keys)
186
1392
        """
1474
        """
187
1475
        # XXX: make this call _move_to_front?
188
1393
        missing_keys = set()
1476
        missing_keys = set()
189
1394
        parent_map = {}
1477
        parent_map = {}
190
1395
        keys_to_lookup = set(keys)
1478
        keys_to_lookup = set(keys)
191
@@ -1475,6 +1558,11 @@
192
1475
                         ' Raising original exception.')
1558
                         ' Raising original exception.')
193
1476
            raise exc_type, exc_value, exc_traceback
1559
            raise exc_type, exc_value, exc_traceback
194
1477
1560
195
1561
    def set_sibling_indices(self, sibling_combined_graph_indices):
196
1562
        """Set the CombinedGraphIndex objects to reorder after reordering self.
197
1563
        """
198
1564
        self._sibling_indices = sibling_combined_graph_indices
199
1565
200
1478
    def validate(self):
1566
    def validate(self):
201
1479
        """Validate that everything in the index can be accessed."""
1567
        """Validate that everything in the index can be accessed."""
202
1480
        while True:
1568
        while True:
203
1481
1569
204
=== modified file 'bzrlib/repofmt/pack_repo.py'
205
--- bzrlib/repofmt/pack_repo.py	2010-02-12 11:58:21 +0000
206
+++ bzrlib/repofmt/pack_repo.py	2010-04-08 07:11:22 +0000
207
@@ -587,26 +587,6 @@
208
587
                                             flush_func=flush_func)
587
                                             flush_func=flush_func)
209
588
        self.add_callback = None
588
        self.add_callback = None
210
589
589
211
590
    def replace_indices(self, index_to_pack, indices):
212
591
        """Replace the current mappings with fresh ones.
213
592
214
593
        This should probably not be used eventually, rather incremental add and
215
594
        removal of indices. It has been added during refactoring of existing
216
595
        code.
217
596
218
597
        :param index_to_pack: A mapping from index objects to
219
598
            (transport, name) tuples for the pack file data.
220
599
        :param indices: A list of indices.
221
600
        """
222
601
        # refresh the revision pack map dict without replacing the instance.
223
602
        self.index_to_pack.clear()
224
603
        self.index_to_pack.update(index_to_pack)
225
604
        # XXX: API break - clearly a 'replace' method would be good?
226
605
        self.combined_index._indices[:] = indices
227
606
        # the current add nodes callback for the current writable index if
228
607
        # there is one.
229
608
        self.add_callback = None
230
609
231
610
    def add_index(self, index, pack):
590
    def add_index(self, index, pack):
232
611
        """Add index to the aggregate, which is an index for Pack pack.
591
        """Add index to the aggregate, which is an index for Pack pack.
233
612
592
234
@@ -619,7 +599,7 @@
235
619
        # expose it to the index map
599
        # expose it to the index map
236
620
        self.index_to_pack[index] = pack.access_tuple()
600
        self.index_to_pack[index] = pack.access_tuple()
237
621
        # put it at the front of the linear index list
601
        # put it at the front of the linear index list
239
622
        self.combined_index.insert_index(0, index)
602
        self.combined_index.insert_index(0, index, pack.name)
240
623
603
241
624
    def add_writable_index(self, index, pack):
604
    def add_writable_index(self, index, pack):
242
625
        """Add an index which is able to have data added to it.
605
        """Add an index which is able to have data added to it.
243
@@ -645,6 +625,7 @@
244
645
        self.data_access.set_writer(None, None, (None, None))
625
        self.data_access.set_writer(None, None, (None, None))
245
646
        self.index_to_pack.clear()
626
        self.index_to_pack.clear()
246
647
        del self.combined_index._indices[:]
627
        del self.combined_index._indices[:]
247
628
        del self.combined_index._index_names[:]
248
648
        self.add_callback = None
629
        self.add_callback = None
249
649
630
250
650
    def remove_index(self, index):
631
    def remove_index(self, index):
251
@@ -653,7 +634,9 @@
252
653
        :param index: An index from the pack parameter.
634
        :param index: An index from the pack parameter.
253
654
        """
635
        """
254
655
        del self.index_to_pack[index]
636
        del self.index_to_pack[index]
256
656
        self.combined_index._indices.remove(index)
637
        pos = self.combined_index._indices.index(index)
257
638
        del self.combined_index._indices[pos]
258
639
        del self.combined_index._index_names[pos]
259
657
        if (self.add_callback is not None and
640
        if (self.add_callback is not None and
260
658
            getattr(index, 'add_nodes', None) == self.add_callback):
641
            getattr(index, 'add_nodes', None) == self.add_callback):
261
659
            self.add_callback = None
642
            self.add_callback = None
262
@@ -1415,11 +1398,20 @@
263
1415
        self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
1398
        self.inventory_index = AggregateIndex(self.reload_pack_names, flush)
264
1416
        self.text_index = AggregateIndex(self.reload_pack_names, flush)
1399
        self.text_index = AggregateIndex(self.reload_pack_names, flush)
265
1417
        self.signature_index = AggregateIndex(self.reload_pack_names, flush)
1400
        self.signature_index = AggregateIndex(self.reload_pack_names, flush)
266
1401
        all_indices = [self.revision_index, self.inventory_index,
267
1402
                self.text_index, self.signature_index]
268
1418
        if use_chk_index:
1403
        if use_chk_index:
269
1419
            self.chk_index = AggregateIndex(self.reload_pack_names, flush)
1404
            self.chk_index = AggregateIndex(self.reload_pack_names, flush)
270
1405
            all_indices.append(self.chk_index)
271
1420
        else:
1406
        else:
272
1421
            # used to determine if we're using a chk_index elsewhere.
1407
            # used to determine if we're using a chk_index elsewhere.
273
1422
            self.chk_index = None
1408
            self.chk_index = None
274
1409
        # Tell all the CombinedGraphIndex objects about each other, so they can
275
1410
        # share hints about which pack names to search first.
276
1411
        all_combined = [agg_idx.combined_index for agg_idx in all_indices]
277
1412
        for combined_idx in all_combined:
278
1413
            combined_idx.set_sibling_indices(
279
1414
                set(all_combined).difference([combined_idx]))
280
1423
        # resumed packs
1415
        # resumed packs
281
1424
        self._resumed_packs = []
1416
        self._resumed_packs = []
282
1425
1417
283
1426
1418
284
=== modified file 'bzrlib/tests/per_pack_repository.py'
285
--- bzrlib/tests/per_pack_repository.py	2010-02-23 07:43:11 +0000
286
+++ bzrlib/tests/per_pack_repository.py	2010-04-08 07:11:22 +0000
287
@@ -288,6 +288,23 @@
288
288
        repo._pack_collection._clear_obsolete_packs()
288
        repo._pack_collection._clear_obsolete_packs()
289
289
        self.assertTrue(repo_transport.has('obsolete_packs/.nfsblahblah'))
289
        self.assertTrue(repo_transport.has('obsolete_packs/.nfsblahblah'))
290
290
290
291
291
    def test_pack_collection_sets_sibling_indices(self):
292
292
        """The CombinedGraphIndex objects in the pack collection are all
293
293
        siblings of each other, so that search-order reorderings will be copied
294
294
        to each other.
295
295
        """
296
296
        repo = self.make_repository('repo')
297
297
        pack_coll = repo._pack_collection
298
298
        indices = set([pack_coll.revision_index, pack_coll.inventory_index,
299
299
                pack_coll.text_index, pack_coll.signature_index])
300
300
        if pack_coll.chk_index is not None:
301
301
            indices.add(pack_coll.chk_index)
302
302
        combined_indices = set(idx.combined_index for idx in indices)
303
303
        for combined_index in combined_indices:
304
304
            self.assertEqual(
305
305
                combined_indices.difference([combined_index]),
306
306
                combined_index._sibling_indices)
307
307
308
291
    def test_pack_after_two_commits_packs_everything(self):
308
    def test_pack_after_two_commits_packs_everything(self):
309
292
        format = self.get_format()
309
        format = self.get_format()
310
293
        tree = self.make_branch_and_tree('.', format=format)
310
        tree = self.make_branch_and_tree('.', format=format)
311
294
311
312
=== modified file 'bzrlib/tests/test_index.py'
313
--- bzrlib/tests/test_index.py	2010-03-05 17:56:55 +0000
314
+++ bzrlib/tests/test_index.py	2010-04-08 07:11:22 +0000
315
@@ -1380,6 +1380,50 @@
316
1380
        self.assertListRaises(errors.NoSuchFile, index.iter_entries_prefix,
1380
        self.assertListRaises(errors.NoSuchFile, index.iter_entries_prefix,
317
1381
                                                 [('1',)])
1381
                                                 [('1',)])
318
1382
1382
319
1383
320
1384
    def make_index_with_simple_nodes(self, name, num_nodes=1):
321
1385
        """Make an index named after 'name', with keys named after 'name' too.
322
1386
323
1387
        Nodes will have a value of '' and no references.
324
1388
        """
325
1389
        nodes = [
326
1390
            (('index-%s-key-%s' % (name, n),), '', ())
327
1391
            for n in range(1, num_nodes+1)]
328
1392
        return self.make_index('index-%s' % name, 0, nodes=nodes)
329
1393
330
1394
    def test_reorder_after_iter_entries(self):
331
1395
        # Four indices: [key1] in index1, [key2,key3] in index2, [] in index3,
332
1396
        # [key4] in index4.
333
1397
        index = CombinedGraphIndex([])
334
1398
        index.insert_index(0, self.make_index_with_simple_nodes('1'), '1')
335
1399
        index.insert_index(1, self.make_index_with_simple_nodes('2'), '2')
336
1400
        index.insert_index(2, self.make_index_with_simple_nodes('3'), '3')
337
1401
        index.insert_index(3, self.make_index_with_simple_nodes('4'), '4')
338
1402
        index1, index2, index3, index4 = index._indices
339
1403
        # Query a key from index4 and index2.
340
1404
        self.assertLength(2, list(index.iter_entries(
341
1405
            [('index-4-key-1',), ('index-2-key-1',)])))
342
1406
        # Now index2 and index4 should be moved to the front (and index1 should
343
1407
        # still be before index3).
344
1408
        self.assertEqual([index2, index4, index1, index3], index._indices)
345
1409
        self.assertEqual(['2', '4', '1', '3'], index._index_names)
346
1410
347
1411
    def test_reorder_propagates_to_siblings(self):
348
1412
        # Two CombinedGraphIndex objects, with the same number of indicies with
349
1413
        # matching names.
350
1414
        cgi1 = CombinedGraphIndex([])
351
1415
        cgi2 = CombinedGraphIndex([])
352
1416
        cgi1.insert_index(0, self.make_index_with_simple_nodes('1-1'), 'one')
353
1417
        cgi1.insert_index(1, self.make_index_with_simple_nodes('1-2'), 'two')
354
1418
        cgi2.insert_index(0, self.make_index_with_simple_nodes('2-1'), 'one')
355
1419
        cgi2.insert_index(1, self.make_index_with_simple_nodes('2-2'), 'two')
356
1420
        index2_1, index2_2 = cgi2._indices
357
1421
        cgi1.set_sibling_indices([cgi2])
358
1422
        # Trigger a reordering in cgi1.  cgi2 will be reordered as well.
359
1423
        list(cgi1.iter_entries([('index-1-2-key-1',)]))
360
1424
        self.assertEqual([index2_2, index2_1], cgi2._indices)
361
1425
        self.assertEqual(['two', 'one'], cgi2._index_names)
362
1426
363
1383
    def test_validate_reloads(self):
1427
    def test_validate_reloads(self):
364
1384
        index, reload_counter = self.make_combined_index_with_missing()
1428
        index, reload_counter = self.make_combined_index_with_missing()
365
1385
        index.validate()
1429
        index.validate()
Status:	Merged
Approved by:	Andrew Bennetts on 2010-04-08
Approved revision:	no longer in the source branch.
Merged at revision:	not available
Proposed branch:	lp:~spiv/bzr/smarter-index-search
Merge into:	lp:bzr
Diff against target:	365 lines (+176/-29) 5 files modified NEWS (+6/-0) bzrlib/index.py (+95/-7) bzrlib/repofmt/pack_repo.py (+14/-22) bzrlib/tests/per_pack_repository.py (+17/-0) bzrlib/tests/test_index.py (+44/-0)
To merge this branch:	bzr merge lp:~spiv/bzr/smarter-index-search
Related bugs:	Link a bug report
Reviewer	Date Requested	Status
Robert Collins (community)	2010-03-26	Approve on 2010-04-05
Martin Packman (community)		Approve on 2010-03-31
John A Meinel	2010-03-18	Pending
Review via email: mp+21615@code.launchpad.net