Merge lp:~jameinel/udd/single-download-524123 into lp:udd

Proposed by John A Meinel
Status: Merged
Merged at revision: not available
Proposed branch: lp:~jameinel/udd/single-download-524123
Merge into: lp:udd
Diff against target: 103 lines (+49/-13)
1 file modified
import_package.py (+49/-13)
To merge this branch: bzr merge lp:~jameinel/udd/single-download-524123
Reviewer Review Type Date Requested Status
Ubuntu Distributed Development Developers Pending
Review via email: mp+19731@code.launchpad.net
To post a comment you must log in.
Revision history for this message
John A Meinel (jameinel) wrote :

This changes the dget code so that if a file exists locally, it computes the md5sum and only downloads it if the hash misses. (bug #524123.)

I have confirmed that in a single run of 'import_package gnome_panel' I'm getting cache hits (and thus not downloading the same file multiple times.)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'import_package.py'
--- import_package.py 2010-02-19 17:53:11 +0000
+++ import_package.py 2010-02-19 18:03:12 +0000
@@ -2,6 +2,10 @@
22
3import datetime3import datetime
4import errno4import errno
5try:
6 from hashlib import md5
7except ImportError:
8 from md5 import md5
5import operator9import operator
6import optparse10import optparse
7import os11import os
@@ -313,13 +317,11 @@
313 return "debian"317 return "debian"
314318
315319
316def grab_file(location, target_dir, possible_transports=None):320def grab_file(base_url, name, target_dir, possible_transports=None):
317 mutter("fetching %s" % location)321 mutter("fetching %s/%s" % (base_url, name))
318 location_base = urlutils.dirname(location)322 local_path = os.path.join(target_dir, name)
319 location_file = urlutils.basename(location)
320 local_path = os.path.join(target_dir, location_file)
321 def get_file(transport):323 def get_file(transport):
322 return transport.get(location_file)324 return transport.get(name)
323 def redirected(t, e, redirection_notice):325 def redirected(t, e, redirection_notice):
324 # _redirected_to has a bug that it doesn't support possible_transports,326 # _redirected_to has a bug that it doesn't support possible_transports,
325 # so we just call get_transport directly, we know we are just getting a327 # so we just call get_transport directly, we know we are just getting a
@@ -328,7 +330,9 @@
328 t2 = transport.get_transport(base,330 t2 = transport.get_transport(base,
329 possible_transports=possible_transports)331 possible_transports=possible_transports)
330 return t2332 return t2
331 t = transport.get_transport(location_base,333 # TODO: We could compute the md5sum while iterating the content, rather
334 # than re-reading the file.
335 t = transport.get_transport(base_url,
332 possible_transports=possible_transports)336 possible_transports=possible_transports)
333 location_f = transport.do_catching_redirections(get_file, t, redirected)337 location_f = transport.do_catching_redirections(get_file, t, redirected)
334 try:338 try:
@@ -339,21 +343,53 @@
339 local_f.close()343 local_f.close()
340 finally:344 finally:
341 location_f.close()345 location_f.close()
346 return local_path
347
348
349def _check_md5(target_path, expected_md5sum):
350 """See if the md5sum of the given file matches."""
351 if not os.path.exists(target_path):
352 return False
353 file_md5 = md5()
354 BUFSIZE = 128<<10
355 f = open(target_path, 'rb')
356 try:
357 while True:
358 b = f.read(BUFSIZE)
359 if not b:
360 break
361 file_md5.update(b)
362 finally:
363 f.close()
364 if file_md5.hexdigest() == expected_md5sum:
365 mutter('File at %s matched md5sum, reusing.' % (target_path,))
366 return True
367 mutter('File at %s did not match expected md5sum. Redownloading.'
368 % (target_path,))
369 return False
342370
343371
344def dget(dsc_location, target_dir, possible_transports=None):372def dget(dsc_location, target_dir, possible_transports=None):
345 grab_file(dsc_location, target_dir, possible_transports=possible_transports)373 base_url, dsc_name = urlutils.split(dsc_location)
346 local_dsc_path = os.path.join(target_dir,374 local_dsc_path = grab_file(base_url, dsc_name, target_dir,
347 urlutils.basename(dsc_location))375 possible_transports=possible_transports)
348 dsc_f = open(local_dsc_path)376 dsc_f = open(local_dsc_path)
349 try:377 try:
350 dsc = deb822.Dsc(dsc_f)378 dsc = deb822.Dsc(dsc_f)
351 files = dsc['files']379 files = dsc['files']
352 for file_info in files:380 for file_info in files:
353 name = file_info['name']381 name = file_info['name']
354 # TODO: md5 check382 target_path = os.path.join(target_dir, name)
355 grab_file(urlutils.join(urlutils.dirname(dsc_location), name),383 if _check_md5(target_path, file_info['md5sum']):
356 target_dir, possible_transports=possible_transports)384 # The file already exists, and the md5sum matches
385 continue
386 # We need to download the file
387 grab_file(base_url, name, target_dir,
388 possible_transports=possible_transports)
389 if not _check_md5(target_path, file_info['md5sum']):
390 raise ValueError('The downloaded content for %s did'
391 ' not match the md5sum in %s'
392 % (target_path, local_dsc_path))
357 finally:393 finally:
358 dsc_f.close()394 dsc_f.close()
359 return local_dsc_path395 return local_dsc_path

Subscribers

People subscribed via source and target branches