Merge lp:~jameinel/udd/single-download-524123 into lp:udd

Proposed by John A Meinel
Status: Merged
Merged at revision: not available
Proposed branch: lp:~jameinel/udd/single-download-524123
Merge into: lp:udd
Diff against target: 103 lines (+49/-13)
1 file modified
import_package.py (+49/-13)
To merge this branch: bzr merge lp:~jameinel/udd/single-download-524123
Reviewer Review Type Date Requested Status
Ubuntu Distributed Development Developers Pending
Review via email: mp+19731@code.launchpad.net
To post a comment you must log in.
Revision history for this message
John A Meinel (jameinel) wrote :

This changes the dget code so that if a file exists locally, it computes the md5sum and only downloads it if the hash misses. (bug #524123.)

I have confirmed that in a single run of 'import_package gnome_panel' I'm getting cache hits (and thus not downloading the same file multiple times.)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'import_package.py'
2--- import_package.py 2010-02-19 17:53:11 +0000
3+++ import_package.py 2010-02-19 18:03:12 +0000
4@@ -2,6 +2,10 @@
5
6 import datetime
7 import errno
8+try:
9+ from hashlib import md5
10+except ImportError:
11+ from md5 import md5
12 import operator
13 import optparse
14 import os
15@@ -313,13 +317,11 @@
16 return "debian"
17
18
19-def grab_file(location, target_dir, possible_transports=None):
20- mutter("fetching %s" % location)
21- location_base = urlutils.dirname(location)
22- location_file = urlutils.basename(location)
23- local_path = os.path.join(target_dir, location_file)
24+def grab_file(base_url, name, target_dir, possible_transports=None):
25+ mutter("fetching %s/%s" % (base_url, name))
26+ local_path = os.path.join(target_dir, name)
27 def get_file(transport):
28- return transport.get(location_file)
29+ return transport.get(name)
30 def redirected(t, e, redirection_notice):
31 # _redirected_to has a bug that it doesn't support possible_transports,
32 # so we just call get_transport directly, we know we are just getting a
33@@ -328,7 +330,9 @@
34 t2 = transport.get_transport(base,
35 possible_transports=possible_transports)
36 return t2
37- t = transport.get_transport(location_base,
38+ # TODO: We could compute the md5sum while iterating the content, rather
39+ # than re-reading the file.
40+ t = transport.get_transport(base_url,
41 possible_transports=possible_transports)
42 location_f = transport.do_catching_redirections(get_file, t, redirected)
43 try:
44@@ -339,21 +343,53 @@
45 local_f.close()
46 finally:
47 location_f.close()
48+ return local_path
49+
50+
51+def _check_md5(target_path, expected_md5sum):
52+ """See if the md5sum of the given file matches."""
53+ if not os.path.exists(target_path):
54+ return False
55+ file_md5 = md5()
56+ BUFSIZE = 128<<10
57+ f = open(target_path, 'rb')
58+ try:
59+ while True:
60+ b = f.read(BUFSIZE)
61+ if not b:
62+ break
63+ file_md5.update(b)
64+ finally:
65+ f.close()
66+ if file_md5.hexdigest() == expected_md5sum:
67+ mutter('File at %s matched md5sum, reusing.' % (target_path,))
68+ return True
69+ mutter('File at %s did not match expected md5sum. Redownloading.'
70+ % (target_path,))
71+ return False
72
73
74 def dget(dsc_location, target_dir, possible_transports=None):
75- grab_file(dsc_location, target_dir, possible_transports=possible_transports)
76- local_dsc_path = os.path.join(target_dir,
77- urlutils.basename(dsc_location))
78+ base_url, dsc_name = urlutils.split(dsc_location)
79+ local_dsc_path = grab_file(base_url, dsc_name, target_dir,
80+ possible_transports=possible_transports)
81 dsc_f = open(local_dsc_path)
82 try:
83 dsc = deb822.Dsc(dsc_f)
84 files = dsc['files']
85 for file_info in files:
86 name = file_info['name']
87- # TODO: md5 check
88- grab_file(urlutils.join(urlutils.dirname(dsc_location), name),
89- target_dir, possible_transports=possible_transports)
90+ target_path = os.path.join(target_dir, name)
91+ if _check_md5(target_path, file_info['md5sum']):
92+ # The file already exists, and the md5sum matches
93+ continue
94+ # We need to download the file
95+ grab_file(base_url, name, target_dir,
96+ possible_transports=possible_transports)
97+ if not _check_md5(target_path, file_info['md5sum']):
98+ raise ValueError('The downloaded content for %s did'
99+ ' not match the md5sum in %s'
100+ % (target_path, local_dsc_path))
101 finally:
102 dsc_f.close()
103 return local_dsc_path

Subscribers

People subscribed via source and target branches