Merge lp:~gary/launchpad/bug562828 into lp:launchpad

Proposed by Gary Poster
Status: Merged
Approved by: Aaron Bentley
Approved revision: no longer in the source branch.
Merged at revision: not available
Proposed branch: lp:~gary/launchpad/bug562828
Merge into: lp:launchpad
Diff against target: 170 lines (+129/-2)
2 files modified
lib/canonical/librarian/ftests/test_gc.py (+31/-0)
lib/canonical/librarian/librariangc.py (+98/-2)
To merge this branch: bzr merge lp:~gary/launchpad/bug562828
Reviewer Review Type Date Requested Status
Aaron Bentley (community) Approve
Review via email: mp+24044@code.launchpad.net

Commit message

Switch lib/canonical/librarian/librariangc.py to using a copy of Python 2.6's os.walk in order to support symlinks in our librarian file directory.

Description of the change

This switches lib/canonical/librarian/librariangc.py to using a copy of Python 2.6's os.walk per the discussion of bug 562828, in order to support symlinks in our librarian file directory.

To post a comment you must log in.
Revision history for this message
Gary Poster (gary) wrote :

Note that _walk in the diff is an almost-pure copy and paste from Python 2.6's os.walk. The only difference is that listdir became os.listdir.

Revision history for this message
Gary Poster (gary) wrote :

Note that I initialized content_id because it was failing when I did a "does my test fail without my fix" test. I don't know if it is a "real" problem but it seemed reasonable to do.

Revision history for this message
Aaron Bentley (abentley) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/canonical/librarian/ftests/test_gc.py'
--- lib/canonical/librarian/ftests/test_gc.py 2010-03-16 10:08:38 +0000
+++ lib/canonical/librarian/ftests/test_gc.py 2010-04-23 22:13:24 +0000
@@ -7,6 +7,7 @@
77
8import shutil8import shutil
9import sys9import sys
10import tempfile
10import os11import os
11from subprocess import Popen, PIPE, STDOUT12from subprocess import Popen, PIPE, STDOUT
12from cStringIO import StringIO13from cStringIO import StringIO
@@ -559,6 +560,36 @@
559 # This should cope.560 # This should cope.
560 librariangc.delete_unwanted_files(self.con)561 librariangc.delete_unwanted_files(self.con)
561562
563 def test_delete_unwanted_files_follows_symlinks(self):
564 # In production, our tree has symlinks in it now. We need to be able
565 # to cope.
566 # First, let's make sure we have some trash.
567 self.layer.switchDbUser(dbuser='testadmin')
568 content = 'foo'
569 self.client.addFile(
570 'foo.txt', len(content), StringIO(content), 'text/plain')
571 # Roll back the database changes, leaving the file on disk.
572 transaction.abort()
573
574 self.layer.switchDbUser(config.librarian_gc.dbuser)
575
576 # Now, we will move the directory containing the trash somewhere else
577 # and make a symlink to it.
578 original = os.path.join(config.librarian_server.root, '00', '00')
579 newdir = tempfile.mkdtemp()
580 alt = os.path.join(newdir, '00')
581 shutil.move(original, alt)
582 os.symlink(alt, original)
583
584 # Now we will do our thing. This is the actual test. It used to
585 # fail.
586 librariangc.delete_unwanted_files(self.con)
587
588 # Clean up.
589 os.remove(original)
590 shutil.move(alt, original)
591 shutil.rmtree(newdir)
592
562 def test_cronscript(self):593 def test_cronscript(self):
563 script_path = os.path.join(594 script_path = os.path.join(
564 config.root, 'cronscripts', 'librarian-gc.py'595 config.root, 'cronscripts', 'librarian-gc.py'
565596
=== modified file 'lib/canonical/librarian/librariangc.py'
--- lib/canonical/librarian/librariangc.py 2010-03-16 10:08:38 +0000
+++ lib/canonical/librarian/librariangc.py 2010-04-23 22:13:24 +0000
@@ -495,6 +495,99 @@
495 loop_tuner = DBLoopTuner(UnreferencedContentPruner(con), 5, log=log)495 loop_tuner = DBLoopTuner(UnreferencedContentPruner(con), 5, log=log)
496 loop_tuner.run()496 loop_tuner.run()
497497
498# XXX gary 2010-04-22 bug=569217
499# We should remove this and use Python 2.6's os.walk once we switch to
500# Python 2.6.
501def _walk(top, topdown=True, onerror=None, followlinks=False):
502 """Directory tree generator.
503
504 For each directory in the directory tree rooted at top (including top
505 itself, but excluding '.' and '..'), yields a 3-tuple
506
507 dirpath, dirnames, filenames
508
509 dirpath is a string, the path to the directory. dirnames is a list of
510 the names of the subdirectories in dirpath (excluding '.' and '..').
511 filenames is a list of the names of the non-directory files in dirpath.
512 Note that the names in the lists are just names, with no path components.
513 To get a full path (which begins with top) to a file or directory in
514 dirpath, do os.path.join(dirpath, name).
515
516 If optional arg 'topdown' is true or not specified, the triple for a
517 directory is generated before the triples for any of its subdirectories
518 (directories are generated top down). If topdown is false, the triple
519 for a directory is generated after the triples for all of its
520 subdirectories (directories are generated bottom up).
521
522 When topdown is true, the caller can modify the dirnames list in-place
523 (e.g., via del or slice assignment), and walk will only recurse into the
524 subdirectories whose names remain in dirnames; this can be used to prune
525 the search, or to impose a specific order of visiting. Modifying
526 dirnames when topdown is false is ineffective, since the directories in
527 dirnames have already been generated by the time dirnames itself is
528 generated.
529
530 By default errors from the os.listdir() call are ignored. If
531 optional arg 'onerror' is specified, it should be a function; it
532 will be called with one argument, an os.error instance. It can
533 report the error to continue with the walk, or raise the exception
534 to abort the walk. Note that the filename is available as the
535 filename attribute of the exception object.
536
537 By default, os.walk does not follow symbolic links to subdirectories on
538 systems that support them. In order to get this functionality, set the
539 optional argument 'followlinks' to true.
540
541 Caution: if you pass a relative pathname for top, don't change the
542 current working directory between resumptions of walk. walk never
543 changes the current directory, and assumes that the client doesn't
544 either.
545
546 Example:
547
548 import os
549 from os.path import join, getsize
550 for root, dirs, files in os.walk('python/Lib/email'):
551 print root, "consumes",
552 print sum([getsize(join(root, name)) for name in files]),
553 print "bytes in", len(files), "non-directory files"
554 if 'CVS' in dirs:
555 dirs.remove('CVS') # don't visit CVS directories
556 """
557
558 from os.path import join, isdir, islink
559
560 # We may not have read permission for top, in which case we can't
561 # get a list of the files the directory contains. os.path.walk
562 # always suppressed the exception then, rather than blow up for a
563 # minor reason when (say) a thousand readable directories are still
564 # left to visit. That logic is copied here.
565 try:
566 # Note that listdir and error are globals in this module due
567 # to earlier import-*.
568 names = os.listdir(top)
569 except error, err:
570 if onerror is not None:
571 onerror(err)
572 return
573
574 dirs, nondirs = [], []
575 for name in names:
576 if isdir(join(top, name)):
577 dirs.append(name)
578 else:
579 nondirs.append(name)
580
581 if topdown:
582 yield top, dirs, nondirs
583 for name in dirs:
584 path = join(top, name)
585 if followlinks or not islink(path):
586 for x in _walk(path, topdown, onerror, followlinks):
587 yield x
588 if not topdown:
589 yield top, dirs, nondirs
590
498591
499def delete_unwanted_files(con):592def delete_unwanted_files(con):
500 """Delete files found on disk that have no corresponding record in the593 """Delete files found on disk that have no corresponding record in the
@@ -524,12 +617,15 @@
524 return result[0]617 return result[0]
525618
526 removed_count = 0619 removed_count = 0
527 next_wanted_content_id = -1620 content_id = next_wanted_content_id = -1
528621
529 hex_content_id_re = re.compile('^[0-9a-f]{8}$')622 hex_content_id_re = re.compile('^[0-9a-f]{8}$')
530 ONE_DAY = 24 * 60 * 60623 ONE_DAY = 24 * 60 * 60
531624
532 for dirpath, dirnames, filenames in os.walk(get_storage_root()):625 # XXX gary 2010-04-22 bug=569217
626 # We should switch back to os.walk once we switch to Python 2.6.
627 for dirpath, dirnames, filenames in _walk(
628 get_storage_root(), followlinks=True):
533629
534 # Ignore known and harmless noise in the Librarian storage area.630 # Ignore known and harmless noise in the Librarian storage area.
535 if 'incoming' in dirnames:631 if 'incoming' in dirnames: