Merge lp:~jelmer/launchpad/617393-cleanup-tmp into lp:launchpad

Proposed by Jelmer Vernooij
Status: Merged
Approved by: Jelmer Vernooij
Approved revision: no longer in the source branch.
Merged at revision: 11423
Proposed branch: lp:~jelmer/launchpad/617393-cleanup-tmp
Merge into: lp:launchpad
Diff against target: 575 lines (+190/-156)
4 files modified
lib/lp/archiveuploader/dscfile.py (+131/-106)
lib/lp/archiveuploader/nascentupload.py (+0/-7)
lib/lp/archiveuploader/tests/test_dscfile.py (+58/-41)
lib/lp/archiveuploader/tests/test_nascentuploadfile.py (+1/-2)
To merge this branch: bzr merge lp:~jelmer/launchpad/617393-cleanup-tmp
Reviewer Review Type Date Requested Status
Abel Deuring (community) code Approve
Review via email: mp+33194@code.launchpad.net

Commit message

Clean up temporary directory after unpacking source package for changelog/copyright inspection.

Description of the change

This changes the handling of unpacking of source packages to extract copyright and changelog files.

Rather than keeping the unpacked sources around and cleaning them up afterwards, this makes us clean up the unpacked source directory immediately.

This does have the consequence that we keep the changelog file into memory before streaming it to the librarian rather than streaming it from disk. That seems like a reasonable tradeoff though, as there were already checks in place to make sure that the changelog file is never bigger than 10Mb; we also only keep a single changelog in memory.

To post a comment you must log in.
Revision history for this message
Jelmer Vernooij (jelmer) wrote :

Tests: ./bin/test lp.archiveuploader

QA: Run the process uploader and see if any files are left behind in /tmp

Revision history for this message
Abel Deuring (adeuring) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/archiveuploader/dscfile.py'
2--- lib/lp/archiveuploader/dscfile.py 2010-08-20 20:31:18 +0000
3+++ lib/lp/archiveuploader/dscfile.py 2010-08-24 08:53:41 +0000
4@@ -13,10 +13,11 @@
5 'SignableTagFile',
6 'DSCFile',
7 'DSCUploadedFile',
8- 'findChangelog',
9- 'findCopyright',
10+ 'find_changelog',
11+ 'find_copyright',
12 ]
13
14+from cStringIO import StringIO
15 import errno
16 import glob
17 import os
18@@ -73,6 +74,70 @@
19 from lp.soyuz.interfaces.sourcepackageformat import SourcePackageFormat
20
21
22+class DpkgSourceError(Exception):
23+
24+ _fmt = "Unable to unpack source package (%(result)s): %(output)s"
25+
26+ def __init__(self, output, result):
27+ Exception.__init__(
28+ self, self._fmt % {"output": output, "result": result})
29+ self.output = output
30+ self.result = result
31+
32+
33+def unpack_source(dsc_filepath):
34+ """Unpack a source package into a temporary directory
35+
36+ :param dsc_filepath: Path to the dsc file
37+ :return: Path to the temporary directory with the unpacked sources
38+ """
39+ # Get a temporary dir together.
40+ unpacked_dir = tempfile.mkdtemp()
41+ try:
42+ # chdir into it
43+ cwd = os.getcwd()
44+ os.chdir(unpacked_dir)
45+ try:
46+ args = ["dpkg-source", "-sn", "-x", dsc_filepath]
47+ dpkg_source = subprocess.Popen(args, stdout=subprocess.PIPE,
48+ stderr=subprocess.PIPE)
49+ output, unused = dpkg_source.communicate()
50+ result = dpkg_source.wait()
51+ finally:
52+ # When all is said and done, chdir out again so that we can
53+ # clean up the tree with shutil.rmtree without leaving the
54+ # process in a directory we're trying to remove.
55+ os.chdir(cwd)
56+
57+ if result != 0:
58+ dpkg_output = prefix_multi_line_string(output, " ")
59+ raise DpkgSourceError(result=result, output=dpkg_output)
60+ except:
61+ shutil.rmtree(unpacked_dir)
62+ raise
63+
64+ return unpacked_dir
65+
66+
67+def cleanup_unpacked_dir(unpacked_dir):
68+ """Remove the directory with an unpacked source package.
69+
70+ :param unpacked_dir: Path to the directory.
71+ """
72+ try:
73+ shutil.rmtree(unpacked_dir)
74+ except OSError, error:
75+ if errno.errorcode[error.errno] != 'EACCES':
76+ raise UploadError(
77+ "couldn't remove tmp dir %s: code %s" % (
78+ unpacked_dir, error.errno))
79+ else:
80+ result = os.system("chmod -R u+rwx " + unpacked_dir)
81+ if result != 0:
82+ raise UploadError("chmod failed with %s" % result)
83+ shutil.rmtree(unpacked_dir)
84+
85+
86 class SignableTagFile:
87 """Base class for signed file verification."""
88
89@@ -160,7 +225,7 @@
90 "rfc2047": rfc2047,
91 "name": name,
92 "email": email,
93- "person": person
94+ "person": person,
95 }
96
97
98@@ -177,9 +242,9 @@
99
100 # Note that files is actually only set inside verify().
101 files = None
102- # Copyright and changelog_path are only set inside unpackAndCheckSource().
103+ # Copyright and changelog are only set inside unpackAndCheckSource().
104 copyright = None
105- changelog_path = None
106+ changelog = None
107
108 def __init__(self, filepath, digest, size, component_and_section,
109 priority, package, version, changes, policy, logger):
110@@ -228,12 +293,9 @@
111 else:
112 self.processSignature()
113
114- self.unpacked_dir = None
115-
116 #
117 # Useful properties.
118 #
119-
120 @property
121 def source(self):
122 """Return the DSC source name."""
123@@ -267,12 +329,11 @@
124 #
125 # DSC file checks.
126 #
127-
128 def verify(self):
129 """Verify the uploaded .dsc file.
130
131- This method is an error generator, i.e, it returns an iterator over all
132- exceptions that are generated while processing DSC file checks.
133+ This method is an error generator, i.e, it returns an iterator over
134+ all exceptions that are generated while processing DSC file checks.
135 """
136
137 for error in SourceUploadFile.verify(self):
138@@ -508,82 +569,53 @@
139 self.logger.debug(
140 "Verifying uploaded source package by unpacking it.")
141
142- # Get a temporary dir together.
143- self.unpacked_dir = tempfile.mkdtemp()
144-
145- # chdir into it
146- cwd = os.getcwd()
147- os.chdir(self.unpacked_dir)
148- dsc_in_tmpdir = os.path.join(self.unpacked_dir, self.filename)
149-
150- package_files = self.files + [self]
151 try:
152- for source_file in package_files:
153- os.symlink(
154- source_file.filepath,
155- os.path.join(self.unpacked_dir, source_file.filename))
156- args = ["dpkg-source", "-sn", "-x", dsc_in_tmpdir]
157- dpkg_source = subprocess.Popen(args, stdout=subprocess.PIPE,
158- stderr=subprocess.PIPE)
159- output, unused = dpkg_source.communicate()
160- result = dpkg_source.wait()
161- finally:
162- # When all is said and done, chdir out again so that we can
163- # clean up the tree with shutil.rmtree without leaving the
164- # process in a directory we're trying to remove.
165- os.chdir(cwd)
166-
167- if result != 0:
168- dpkg_output = prefix_multi_line_string(output, " ")
169+ unpacked_dir = unpack_source(self.filepath)
170+ except DpkgSourceError, e:
171 yield UploadError(
172 "dpkg-source failed for %s [return: %s]\n"
173 "[dpkg-source output: %s]"
174- % (self.filename, result, dpkg_output))
175-
176- # Copy debian/copyright file content. It will be stored in the
177- # SourcePackageRelease records.
178-
179- # Check if 'dpkg-source' created only one directory.
180- temp_directories = [
181- dirname for dirname in os.listdir(self.unpacked_dir)
182- if os.path.isdir(dirname)]
183- if len(temp_directories) > 1:
184- yield UploadError(
185- 'Unpacked source contains more than one directory: %r'
186- % temp_directories)
187-
188- # XXX cprov 20070713: We should access only the expected directory
189- # name (<sourcename>-<no_epoch(no_revision(version))>).
190-
191- # Locate both the copyright and changelog files for later processing.
192- for error in findCopyright(self, self.unpacked_dir, self.logger):
193- yield error
194-
195- for error in findChangelog(self, self.unpacked_dir, self.logger):
196- yield error
197-
198- self.logger.debug("Cleaning up source tree.")
199+ % (self.filename, e.result, e.output))
200+ return
201+
202+ try:
203+ # Copy debian/copyright file content. It will be stored in the
204+ # SourcePackageRelease records.
205+
206+ # Check if 'dpkg-source' created only one directory.
207+ temp_directories = [
208+ dirname for dirname in os.listdir(unpacked_dir)
209+ if os.path.isdir(dirname)]
210+ if len(temp_directories) > 1:
211+ yield UploadError(
212+ 'Unpacked source contains more than one directory: %r'
213+ % temp_directories)
214+
215+ # XXX cprov 20070713: We should access only the expected directory
216+ # name (<sourcename>-<no_epoch(no_revision(version))>).
217+
218+ # Locate both the copyright and changelog files for later
219+ # processing.
220+ try:
221+ self.copyright = find_copyright(unpacked_dir, self.logger)
222+ except UploadError, error:
223+ yield error
224+ return
225+ except UploadWarning, warning:
226+ yield warning
227+
228+ try:
229+ self.changelog = find_changelog(unpacked_dir, self.logger)
230+ except UploadError, error:
231+ yield error
232+ return
233+ except UploadWarning, warning:
234+ yield warning
235+ finally:
236+ self.logger.debug("Cleaning up source tree.")
237+ cleanup_unpacked_dir(unpacked_dir)
238 self.logger.debug("Done")
239
240- def cleanUp(self):
241- if self.unpacked_dir is None:
242- return
243- try:
244- shutil.rmtree(self.unpacked_dir)
245- except OSError, error:
246- # XXX: dsilvers 2006-03-15: We currently lack a test for this.
247- if errno.errorcode[error.errno] != 'EACCES':
248- raise UploadError(
249- "%s: couldn't remove tmp dir %s: code %s" % (
250- self.filename, self.unpacked_dir, error.errno))
251- else:
252- result = os.system("chmod -R u+rwx " + self.unpacked_dir)
253- if result != 0:
254- raise UploadError("chmod failed with %s" % result)
255- shutil.rmtree(self.unpacked_dir)
256- self.unpacked_dir = None
257-
258-
259 def findBuild(self):
260 """Find and return the SourcePackageRecipeBuild, if one is specified.
261
262@@ -641,8 +673,8 @@
263
264 changelog_lfa = self.librarian.create(
265 "changelog",
266- os.stat(self.changelog_path).st_size,
267- open(self.changelog_path, "r"),
268+ len(self.changelog),
269+ StringIO(self.changelog),
270 "text/x-debian-source-changelog",
271 restricted=self.policy.archive.private)
272
273@@ -702,6 +734,7 @@
274 validation inside DSCFile.verify(); there is no
275 store_in_database() method.
276 """
277+
278 def __init__(self, filepath, digest, size, policy, logger):
279 component_and_section = priority = "--no-value--"
280 NascentUploadFile.__init__(
281@@ -721,7 +754,7 @@
282
283 :param source_file: The directory where the source was extracted
284 :param source_dir: The directory where the source was extracted.
285- :return fullpath: The full path of the file, else return None if the
286+ :return fullpath: The full path of the file, else return None if the
287 file is not found.
288 """
289 # Instead of trying to predict the unpacked source directory name,
290@@ -744,50 +777,42 @@
291 return fullpath
292 return None
293
294-def findCopyright(dsc_file, source_dir, logger):
295+
296+def find_copyright(source_dir, logger):
297 """Find and store any debian/copyright.
298
299- :param dsc_file: A DSCFile object where the copyright will be stored.
300 :param source_dir: The directory where the source was extracted.
301 :param logger: A logger object for debug output.
302+ :return: Contents of copyright file
303 """
304- try:
305- copyright_file = findFile(source_dir, 'debian/copyright')
306- except UploadError, error:
307- yield error
308- return
309+ copyright_file = findFile(source_dir, 'debian/copyright')
310 if copyright_file is None:
311- yield UploadWarning("No copyright file found.")
312- return
313+ raise UploadWarning("No copyright file found.")
314
315 logger.debug("Copying copyright contents.")
316- dsc_file.copyright = open(copyright_file).read().strip()
317-
318-
319-def findChangelog(dsc_file, source_dir, logger):
320+ return open(copyright_file).read().strip()
321+
322+
323+def find_changelog(source_dir, logger):
324 """Find and move any debian/changelog.
325
326 This function finds the changelog file within the source package. The
327 changelog file is later uploaded to the librarian by
328 DSCFile.storeInDatabase().
329
330- :param dsc_file: A DSCFile object where the copyright will be stored.
331 :param source_dir: The directory where the source was extracted.
332 :param logger: A logger object for debug output.
333+ :return: Changelog contents
334 """
335- try:
336- changelog_file = findFile(source_dir, 'debian/changelog')
337- except UploadError, error:
338- yield error
339- return
340+ changelog_file = findFile(source_dir, 'debian/changelog')
341 if changelog_file is None:
342 # Policy requires debian/changelog to always exist.
343- yield UploadError("No changelog file found.")
344- return
345+ raise UploadError("No changelog file found.")
346
347 # Move the changelog file out of the package direcotry
348 logger.debug("Found changelog")
349- dsc_file.changelog_path = changelog_file
350+ return open(changelog_file, 'r').read()
351+
352
353 def check_format_1_0_files(filename, file_type_counts, component_counts,
354 bzip2_count):
355
356=== modified file 'lib/lp/archiveuploader/nascentupload.py'
357--- lib/lp/archiveuploader/nascentupload.py 2010-08-20 20:31:18 +0000
358+++ lib/lp/archiveuploader/nascentupload.py 2010-08-24 08:53:41 +0000
359@@ -893,12 +893,6 @@
360 'Exception while accepting:\n %s' % e, exc_info=True)
361 self.do_reject(notify)
362 return False
363- else:
364- self.cleanUp()
365-
366- def cleanUp(self):
367- if self.changes.dsc is not None:
368- self.changes.dsc.cleanUp()
369
370 def do_reject(self, notify=True):
371 """Reject the current upload given the reason provided."""
372@@ -929,7 +923,6 @@
373 self.queue_root.notify(summary_text=self.rejection_message,
374 changes_file_object=changes_file_object, logger=self.logger)
375 changes_file_object.close()
376- self.cleanUp()
377
378 def _createQueueEntry(self):
379 """Return a PackageUpload object."""
380
381=== modified file 'lib/lp/archiveuploader/tests/test_dscfile.py'
382--- lib/lp/archiveuploader/tests/test_dscfile.py 2010-08-20 20:31:18 +0000
383+++ lib/lp/archiveuploader/tests/test_dscfile.py 2010-08-24 08:53:41 +0000
384@@ -10,10 +10,12 @@
385 from canonical.launchpad.scripts.logger import QuietFakeLogger
386 from canonical.testing.layers import LaunchpadZopelessLayer
387 from lp.archiveuploader.dscfile import (
388+ cleanup_unpacked_dir,
389 DSCFile,
390- findChangelog,
391- findCopyright,
392+ find_changelog,
393+ find_copyright,
394 format_to_file_checker_map,
395+ unpack_source,
396 )
397 from lp.archiveuploader.nascentuploadfile import UploadError
398 from lp.archiveuploader.tests import (
399@@ -37,9 +39,6 @@
400
401 class TestDscFile(TestCase):
402
403- class MockDSCFile:
404- copyright = None
405-
406 def setUp(self):
407 super(TestDscFile, self).setUp()
408 self.tmpdir = self.makeTemporaryDirectory()
409@@ -47,7 +46,6 @@
410 os.makedirs(self.dir_path)
411 self.copyright_path = os.path.join(self.dir_path, "copyright")
412 self.changelog_path = os.path.join(self.dir_path, "changelog")
413- self.dsc_file = self.MockDSCFile()
414
415 def testBadDebianCopyright(self):
416 """Test that a symlink as debian/copyright will fail.
417@@ -56,14 +54,10 @@
418 dangling symlink in an attempt to try and access files on the system
419 processing the source packages."""
420 os.symlink("/etc/passwd", self.copyright_path)
421- errors = list(findCopyright(
422- self.dsc_file, self.tmpdir, mock_logger_quiet))
423-
424- self.assertEqual(len(errors), 1)
425- self.assertIsInstance(errors[0], UploadError)
426+ error = self.assertRaises(
427+ UploadError, find_copyright, self.tmpdir, mock_logger_quiet)
428 self.assertEqual(
429- errors[0].args[0],
430- "Symbolic link for debian/copyright not allowed")
431+ error.args[0], "Symbolic link for debian/copyright not allowed")
432
433 def testGoodDebianCopyright(self):
434 """Test that a proper copyright file will be accepted"""
435@@ -72,11 +66,8 @@
436 file.write(copyright)
437 file.close()
438
439- errors = list(findCopyright(
440- self.dsc_file, self.tmpdir, mock_logger_quiet))
441-
442- self.assertEqual(len(errors), 0)
443- self.assertEqual(self.dsc_file.copyright, copyright)
444+ self.assertEquals(
445+ copyright, find_copyright(self.tmpdir, mock_logger_quiet))
446
447 def testBadDebianChangelog(self):
448 """Test that a symlink as debian/changelog will fail.
449@@ -85,14 +76,10 @@
450 dangling symlink in an attempt to try and access files on the system
451 processing the source packages."""
452 os.symlink("/etc/passwd", self.changelog_path)
453- errors = list(findChangelog(
454- self.dsc_file, self.tmpdir, mock_logger_quiet))
455-
456- self.assertEqual(len(errors), 1)
457- self.assertIsInstance(errors[0], UploadError)
458+ error = self.assertRaises(
459+ UploadError, find_changelog, self.tmpdir, mock_logger_quiet)
460 self.assertEqual(
461- errors[0].args[0],
462- "Symbolic link for debian/changelog not allowed")
463+ error.args[0], "Symbolic link for debian/changelog not allowed")
464
465 def testGoodDebianChangelog(self):
466 """Test that a proper changelog file will be accepted"""
467@@ -101,12 +88,8 @@
468 file.write(changelog)
469 file.close()
470
471- errors = list(findChangelog(
472- self.dsc_file, self.tmpdir, mock_logger_quiet))
473-
474- self.assertEqual(len(errors), 0)
475- self.assertEqual(self.dsc_file.changelog_path,
476- self.changelog_path)
477+ self.assertEquals(
478+ changelog, find_changelog(self.tmpdir, mock_logger_quiet))
479
480 def testOversizedFile(self):
481 """Test that a file larger than 10MiB will fail.
482@@ -125,13 +108,10 @@
483 file.write(empty_file)
484 file.close()
485
486- errors = list(findChangelog(
487- self.dsc_file, self.tmpdir, mock_logger_quiet))
488-
489- self.assertIsInstance(errors[0], UploadError)
490+ error = self.assertRaises(
491+ UploadError, find_changelog, self.tmpdir, mock_logger_quiet)
492 self.assertEqual(
493- errors[0].args[0],
494- "debian/changelog file too large, 10MiB max")
495+ error.args[0], "debian/changelog file too large, 10MiB max")
496
497
498 class TestDscFileLibrarian(TestCaseWithFactory):
499@@ -141,6 +121,7 @@
500
501 def getDscFile(self, name):
502 dsc_path = datadir(os.path.join('suite', name, name + '.dsc'))
503+
504 class Changes:
505 architectures = ['source']
506 logger = QuietFakeLogger()
507@@ -157,10 +138,7 @@
508 os.chmod(tempdir, 0555)
509 try:
510 dsc_file = self.getDscFile('bar_1.0-1')
511- try:
512- list(dsc_file.verify())
513- finally:
514- dsc_file.cleanUp()
515+ list(dsc_file.verify())
516 finally:
517 os.chmod(tempdir, 0755)
518
519@@ -292,3 +270,42 @@
520 # A 3.0 (native) source with component tarballs is invalid.
521 self.assertErrorsForFiles(
522 [self.wrong_files_error], {NATIVE_TARBALL: 1}, {'foo': 1})
523+
524+
525+class UnpackedDirTests(TestCase):
526+ """Tests for unpack_source and cleanup_unpacked_dir."""
527+
528+ def test_unpack_source(self):
529+ # unpack_source unpacks in a temporary directory and returns the
530+ # path.
531+ unpacked_dir = unpack_source(
532+ datadir(os.path.join('suite', 'bar_1.0-1', 'bar_1.0-1.dsc')))
533+ try:
534+ self.assertEquals(["bar-1.0"], os.listdir(unpacked_dir))
535+ self.assertContentEqual(
536+ ["THIS_IS_BAR", "debian"],
537+ os.listdir(os.path.join(unpacked_dir, "bar-1.0")))
538+ finally:
539+ cleanup_unpacked_dir(unpacked_dir)
540+
541+ def test_cleanup(self):
542+ # cleanup_dir removes the temporary directory and all files under it.
543+ temp_dir = self.makeTemporaryDirectory()
544+ unpacked_dir = os.path.join(temp_dir, "unpacked")
545+ os.mkdir(unpacked_dir)
546+ os.mkdir(os.path.join(unpacked_dir, "bar_1.0"))
547+ cleanup_unpacked_dir(unpacked_dir)
548+ self.assertFalse(os.path.exists(unpacked_dir))
549+
550+ def test_cleanup_invalid_mode(self):
551+ # cleanup_dir can remove a directory even if the mode does
552+ # not allow it.
553+ temp_dir = self.makeTemporaryDirectory()
554+ unpacked_dir = os.path.join(temp_dir, "unpacked")
555+ os.mkdir(unpacked_dir)
556+ bar_path = os.path.join(unpacked_dir, "bar_1.0")
557+ os.mkdir(bar_path)
558+ os.chmod(bar_path, 0600)
559+ os.chmod(unpacked_dir, 0600)
560+ cleanup_unpacked_dir(unpacked_dir)
561+ self.assertFalse(os.path.exists(unpacked_dir))
562
563=== modified file 'lib/lp/archiveuploader/tests/test_nascentuploadfile.py'
564--- lib/lp/archiveuploader/tests/test_nascentuploadfile.py 2010-08-20 20:31:18 +0000
565+++ lib/lp/archiveuploader/tests/test_nascentuploadfile.py 2010-08-24 08:53:41 +0000
566@@ -169,8 +169,7 @@
567 uploadfile = self.createDSCFile(
568 "foo.dsc", dsc, "main/net", "extra", "dulwich", "0.42",
569 self.createChangesFile("foo.changes", changes))
570- (uploadfile.changelog_path, changelog_digest, changelog_size) = (
571- self.writeUploadFile("changelog", "DUMMY"))
572+ uploadfile.changelog = "DUMMY"
573 uploadfile.files = []
574 release = uploadfile.storeInDatabase(None)
575 self.assertEquals("0.42", release.version)