Code review comment for lp:~wgrant/launchpad/distroseries-source-format-selection

Revision history for this message
William Grant (wgrant) wrote :

On Tue, 2009-11-24 at 23:20 +0000, Julian Edwards wrote:
> Review: Needs Fixing code
> Thanks for making this change William. I've commented inline.

Thanks for the review, Julian. I've fixed the main issues -- an
intermediate diff is attached.

> > === modified file 'lib/lp/archiveuploader/dscfile.py'
> > --- lib/lp/archiveuploader/dscfile.py 2009-11-16 22:06:14 +0000
> > +++ lib/lp/archiveuploader/dscfile.py 2009-11-18 03:10:29 +0000
> > @@ -32,7 +32,8 @@
> > from lp.archiveuploader.utils import (
> > prefix_multi_line_string, safe_fix_maintainer, ParseMaintError,
> > re_valid_pkg_name, re_valid_version, re_issource,
> > - determine_source_file_type)
> > + re_is_component_orig_tar_ext, determine_source_file_type,
> > + get_source_file_extension)
>
> Can you put the imports in alphabetical order please.

Done.

> > from canonical.encoding import guess as guess_encoding
> > from lp.registry.interfaces.person import IPersonSet, PersonCreationRationale
> > from lp.registry.interfaces.sourcepackage import SourcePackageFileType
> > @@ -347,8 +348,9 @@
> > distribution=self.policy.distro,
> > purpose=ArchivePurpose.PARTNER)]
> > elif (self.policy.archive.purpose == ArchivePurpose.PPA and
> > - determine_source_file_type(filename) ==
> > - SourcePackageFileType.ORIG_TARBALL):
> > + determine_source_file_type(filename) in (
> > + SourcePackageFileType.ORIG_TARBALL,
> > + SourcePackageFileType.COMPONENT_ORIG_TARBALL)):
> > archives = [self.policy.archive, self.policy.distro.main_archive]
> > else:
> > archives = [self.policy.archive]
> > @@ -374,9 +376,13 @@
> > """
> >
> > diff_count = 0
> > + debian_tar_count = 0
> > orig_tar_count = 0
> > + component_orig_tar_counts = {}
> > native_tar_count = 0
> >
> > + bzip2_count = 0
> > +
> > files_missing = False
> > for sub_dsc_file in self.files:
> > filetype = determine_source_file_type(sub_dsc_file.filename)
> > @@ -385,12 +391,23 @@
> > diff_count += 1
> > elif filetype == SourcePackageFileType.ORIG_TARBALL:
> > orig_tar_count += 1
> > + elif filetype == SourcePackageFileType.COMPONENT_ORIG_TARBALL:
> > + component = re_is_component_orig_tar_ext.match(
> > + get_source_file_extension(sub_dsc_file.filename)).group(1)
> > + if component not in component_orig_tar_counts:
> > + component_orig_tar_counts[component] = 0
> > + component_orig_tar_counts[component] += 1
> > + elif filetype == SourcePackageFileType.DEBIAN_TARBALL:
> > + debian_tar_count += 1
> > elif filetype == SourcePackageFileType.NATIVE_TARBALL:
> > native_tar_count += 1
>
> This big list of "elif" statements is looking a bit unwieldy, especially with all the new conditions. Unless you can suggest anything better, how about we do something like:
> [snip]

While your suggested solution would not work (at least not without the
'nonlocal' keyword), I believe I have found something better. I keep one
dict mapping SPFTs to counts, and another mapping component names to
counts. This means there's only one special case (for
COMPONENT_ORIG_TARBALL), and the code for each format becomes much
cleaner too.

> > else:
> > yield UploadError('Unknown file: ' + sub_dsc_file.filename)
> > continue
> >
> > + if sub_dsc_file.filename.endswith('.bz2'):
> > + bzip2_count += 1
> > +
> > try:
> > library_file, file_archive = self._getFileByName(
> > sub_dsc_file.filename)
> > @@ -440,6 +457,10 @@
> > yield UploadError(
> > "%s: has more than one orig.tar.*."
> > % self.filename)
> > + if debian_tar_count > 1:
> > + yield UploadError(
> > + "%s: has more than one debian.tar.*."
> > + % self.filename)
> > if native_tar_count > 1:
> > yield UploadError(
> > "%s: has more than one tar.*."
> > @@ -455,15 +476,73 @@
> > "%s: must have exactly one tar.* or orig.tar.*."
> > % self.filename)
> >
> > + if native_tar_count > 0 and debian_tar_count > 0:
> > + yield UploadError(
> > + "%s: must have no more than one tar.* or debian.tar.*."
> > + % self.filename)
> > +
> > # Format 1.0 must be native (exactly one tar.gz), or
> > # have an orig.tar.gz and a diff.gz. It cannot have
> > # compression types other than 'gz'.
> > if self.format == '1.0':
> > + if bzip2_count > 0:
> > + yield UploadError(
> > + "%s: is format 1.0 but uses bzip2 compression."
> > + % self.filename)
> > +
> > if ((diff_count == 0 and native_tar_count == 0) or
> > (diff_count > 0 and native_tar_count > 0)):
> > yield UploadError(
> > "%s: must have exactly one diff.gz or tar.gz."
> > % self.filename)
> > +
> > + if debian_tar_count > 0:
> > + yield UploadError(
> > + "%s: is format 1.0 but has debian.tar.*."
> > + % self.filename)
> > +
> > + if len(component_orig_tar_counts) > 0:
> > + yield UploadError(
> > + "%s: is format 1.0 but has orig-COMPONENT.tar.*."
> > + % self.filename)
> > + # Format 3.0 (native) must have exactly one tar.*.
> > + # gz and bz2 are valid compression types.
> > + elif self.format == '3.0 (native)':
> > + if native_tar_count == 0:
> > + yield UploadError(
> > + "%s: must have exactly one tar.*."
> > + % self.filename)
> > +
> > + if diff_count > 0:
> > + yield UploadError(
> > + "%s: is format 3.0 but has diff.gz."
> > + % self.filename)
> > +
> > + if len(component_orig_tar_counts) > 0:
> > + yield UploadError(
> > + "%s: is native but has orig-COMPONENT.tar.*."
> > + % self.filename)
> > + elif self.format == '3.0 (quilt)':
> > + if orig_tar_count == 0:
> > + yield UploadError(
> > + "%s: must have exactly one orig.tar.*."
> > + % self.filename)
> > +
> > + if debian_tar_count == 0:
> > + yield UploadError(
> > + "%s: must have exactly one debian.tar.*."
> > + % self.filename)
> > +
> > + if diff_count > 0:
> > + yield UploadError(
> > + "%s: is format 3.0 but has diff.gz."
> > + % self.filename)
> > +
> > + for component in component_orig_tar_counts:
> > + if component_orig_tar_counts[component] > 1:
> > + yield UploadError(
> > + "%s: has more than one orig-%s.tar.*."
> > + % (self.filename, component))
>
> Can you split this massive method up a bit please. Ideally into three separate
> methods that check the different formats.

I've split it, and completely changed the verification style. The
failure messages are much less verbose, but one would have to manually
edit the .dsc to get any of them anyway. I feel the reduction in code
makes things much better.

> That will also make it easier for you to unit test this change without creating
> new test packages so that we can be sure all the new file type rejection cases
> are handled.

This does make testing much easier.

However, there are now just a few code paths, but a lot of potential
failure cases. How completely should I test?

> Also, can you please change it to compare against the format enum instead
> of strings. Then the assertion error below will make more sense.
>
> > else:
> > raise AssertionError("Unknown source format.")

It will now die with a KeyError when looking up the file checker, which
seems as reasonable as the old explicit AssertionError.

> >
> >
> > === modified file 'lib/lp/archiveuploader/nascentuploadfile.py'
> > --- lib/lp/archiveuploader/nascentuploadfile.py 2009-11-10 13:09:26 +0000
> > +++ lib/lp/archiveuploader/nascentuploadfile.py 2009-11-18 03:10:30 +0000
> > @@ -352,8 +352,9 @@
> > "Architecture field." % (self.filename))
> >
> > version_chopped = re_no_epoch.sub('', self.version)
> > - if determine_source_file_type(self.filename) == (
> > - SourcePackageFileType.ORIG_TARBALL):
> > + if determine_source_file_type(self.filename) in (
> > + SourcePackageFileType.ORIG_TARBALL,
> > + SourcePackageFileType.COMPONENT_ORIG_TARBALL):
>
> I've seen this check a couple of times now, I think it would be a good idea to
> refactor it into a property. (is_orig or something like that)

The two uses are on different types: this is a SourceUploadFile, while
the other is a SourcePackageReleaseFile. Should I instead create a
utility function alongside SPFT, perhaps?

> [snip]

1=== modified file 'lib/lp/archiveuploader/dscfile.py'
2--- lib/lp/archiveuploader/dscfile.py 2009-11-18 02:58:23 +0000
3+++ lib/lp/archiveuploader/dscfile.py 2009-11-25 09:40:30 +0000
4@@ -30,10 +30,9 @@
5 from lp.archiveuploader.tagfiles import (
6 parse_tagfile, TagFileParseError)
7 from lp.archiveuploader.utils import (
8- prefix_multi_line_string, safe_fix_maintainer, ParseMaintError,
9- re_valid_pkg_name, re_valid_version, re_issource,
10- re_is_component_orig_tar_ext, determine_source_file_type,
11- get_source_file_extension)
12+ determine_source_file_type, get_source_file_extension,
13+ ParseMaintError, prefix_multi_line_string, re_is_component_orig_tar_ext,
14+ re_issource, re_valid_pkg_name, re_valid_version, safe_fix_maintainer)
15 from canonical.encoding import guess as guess_encoding
16 from lp.registry.interfaces.person import IPersonSet, PersonCreationRationale
17 from lp.registry.interfaces.sourcepackage import SourcePackageFileType
18@@ -161,6 +160,9 @@
19
20 Can raise UploadError.
21 """
22+ # Avoid circular imports.
23+ from lp.archiveuploader.nascentupload import EarlyReturnUploadError
24+
25 SourceUploadFile.__init__(
26 self, filepath, digest, size, component_and_section, priority,
27 package, version, changes, policy, logger)
28@@ -187,6 +189,10 @@
29 if 'format' not in self._dict:
30 self._dict['format'] = "1.0"
31
32+ if self.format is None:
33+ raise EarlyReturnUploadError(
34+ "Unsupported source format: %s" % self._dict['format'])
35+
36 if self.policy.unsigned_dsc_ok:
37 self.logger.debug("DSC file can be unsigned.")
38 else:
39@@ -209,7 +215,11 @@
40 @property
41 def format(self):
42 """Return the DSC format."""
43- return self._dict['format']
44+ try:
45+ return SourcePackageFormat.getTermByToken(
46+ self._dict['format']).value
47+ except LookupError:
48+ return None
49
50 @property
51 def architecture(self):
52@@ -232,8 +242,6 @@
53 This method is an error generator, i.e, it returns an iterator over all
54 exceptions that are generated while processing DSC file checks.
55 """
56- # Avoid circular imports.
57- from lp.archiveuploader.nascentupload import EarlyReturnUploadError
58
59 for error in SourceUploadFile.verify(self):
60 yield error
61@@ -272,14 +280,8 @@
62 yield UploadError(
63 "%s: invalid version %s" % (self.filename, self.dsc_version))
64
65- try:
66- format_term = SourcePackageFormat.getTermByToken(self.format)
67- except LookupError:
68- raise EarlyReturnUploadError(
69- "Unsupported source format: %s" % self.format)
70-
71 if not self.policy.distroseries.isSourcePackageFormatPermitted(
72- format_term.value):
73+ self.format):
74 yield UploadError(
75 "%s: format '%s' is not permitted in %s." %
76 (self.filename, self.format, self.policy.distroseries.name))
77@@ -375,35 +377,32 @@
78 and checksum.
79 """
80
81- diff_count = 0
82- debian_tar_count = 0
83- orig_tar_count = 0
84+ file_type_counts = {
85+ SourcePackageFileType.DIFF: 0,
86+ SourcePackageFileType.ORIG_TARBALL: 0,
87+ SourcePackageFileType.DEBIAN_TARBALL: 0,
88+ SourcePackageFileType.NATIVE_TARBALL: 0,
89+ }
90 component_orig_tar_counts = {}
91- native_tar_count = 0
92-
93 bzip2_count = 0
94-
95 files_missing = False
96+
97 for sub_dsc_file in self.files:
98- filetype = determine_source_file_type(sub_dsc_file.filename)
99-
100- if filetype == SourcePackageFileType.DIFF:
101- diff_count += 1
102- elif filetype == SourcePackageFileType.ORIG_TARBALL:
103- orig_tar_count += 1
104- elif filetype == SourcePackageFileType.COMPONENT_ORIG_TARBALL:
105+ file_type = determine_source_file_type(sub_dsc_file.filename)
106+
107+ if file_type is None:
108+ yield UploadError('Unknown file: ' + sub_dsc_file.filename)
109+ continue
110+
111+ if file_type == SourcePackageFileType.COMPONENT_ORIG_TARBALL:
112+ # Split the count by component name.
113 component = re_is_component_orig_tar_ext.match(
114 get_source_file_extension(sub_dsc_file.filename)).group(1)
115 if component not in component_orig_tar_counts:
116 component_orig_tar_counts[component] = 0
117 component_orig_tar_counts[component] += 1
118- elif filetype == SourcePackageFileType.DEBIAN_TARBALL:
119- debian_tar_count += 1
120- elif filetype == SourcePackageFileType.NATIVE_TARBALL:
121- native_tar_count += 1
122 else:
123- yield UploadError('Unknown file: ' + sub_dsc_file.filename)
124- continue
125+ file_type_counts[file_type] += 1
126
127 if sub_dsc_file.filename.endswith('.bz2'):
128 bzip2_count += 1
129@@ -452,99 +451,10 @@
130 yield error
131 files_missing = True
132
133- # Reject if we have more than one file of any type.
134- if orig_tar_count > 1:
135- yield UploadError(
136- "%s: has more than one orig.tar.*."
137- % self.filename)
138- if debian_tar_count > 1:
139- yield UploadError(
140- "%s: has more than one debian.tar.*."
141- % self.filename)
142- if native_tar_count > 1:
143- yield UploadError(
144- "%s: has more than one tar.*."
145- % self.filename)
146- if diff_count > 1:
147- yield UploadError(
148- "%s: has more than one diff.gz."
149- % self.filename)
150-
151- if ((orig_tar_count == 0 and native_tar_count == 0) or
152- (orig_tar_count > 0 and native_tar_count > 0)):
153- yield UploadError(
154- "%s: must have exactly one tar.* or orig.tar.*."
155- % self.filename)
156-
157- if native_tar_count > 0 and debian_tar_count > 0:
158- yield UploadError(
159- "%s: must have no more than one tar.* or debian.tar.*."
160- % self.filename)
161-
162- # Format 1.0 must be native (exactly one tar.gz), or
163- # have an orig.tar.gz and a diff.gz. It cannot have
164- # compression types other than 'gz'.
165- if self.format == '1.0':
166- if bzip2_count > 0:
167- yield UploadError(
168- "%s: is format 1.0 but uses bzip2 compression."
169- % self.filename)
170-
171- if ((diff_count == 0 and native_tar_count == 0) or
172- (diff_count > 0 and native_tar_count > 0)):
173- yield UploadError(
174- "%s: must have exactly one diff.gz or tar.gz."
175- % self.filename)
176-
177- if debian_tar_count > 0:
178- yield UploadError(
179- "%s: is format 1.0 but has debian.tar.*."
180- % self.filename)
181-
182- if len(component_orig_tar_counts) > 0:
183- yield UploadError(
184- "%s: is format 1.0 but has orig-COMPONENT.tar.*."
185- % self.filename)
186- # Format 3.0 (native) must have exactly one tar.*.
187- # gz and bz2 are valid compression types.
188- elif self.format == '3.0 (native)':
189- if native_tar_count == 0:
190- yield UploadError(
191- "%s: must have exactly one tar.*."
192- % self.filename)
193-
194- if diff_count > 0:
195- yield UploadError(
196- "%s: is format 3.0 but has diff.gz."
197- % self.filename)
198-
199- if len(component_orig_tar_counts) > 0:
200- yield UploadError(
201- "%s: is native but has orig-COMPONENT.tar.*."
202- % self.filename)
203- elif self.format == '3.0 (quilt)':
204- if orig_tar_count == 0:
205- yield UploadError(
206- "%s: must have exactly one orig.tar.*."
207- % self.filename)
208-
209- if debian_tar_count == 0:
210- yield UploadError(
211- "%s: must have exactly one debian.tar.*."
212- % self.filename)
213-
214- if diff_count > 0:
215- yield UploadError(
216- "%s: is format 3.0 but has diff.gz."
217- % self.filename)
218-
219- for component in component_orig_tar_counts:
220- if component_orig_tar_counts[component] > 1:
221- yield UploadError(
222- "%s: has more than one orig-%s.tar.*."
223- % (self.filename, component))
224- else:
225- raise AssertionError("Unknown source format.")
226+ for error in format_to_file_checker_map[self.format](
227+ self.filename, file_type_counts, component_orig_tar_counts,
228+ bzip2_count):
229+ yield error
230
231 if files_missing:
232 yield UploadError(
233@@ -727,3 +637,78 @@
234 yield error
235
236
237+def check_format_1_0_files(filename, file_type_counts, component_counts,
238+ bzip2_count):
239+ """Check that the given counts of each file type suit format 1.0.
240+
241+ A 1.0 source must be native (with only one tar.gz), or have an orig.tar.gz
242+ and a diff.gz. It cannot use bzip2 compression.
243+ """
244+ if bzip2_count > 0:
245+ yield UploadError(
246+ "%s: is format 1.0 but uses bzip2 compression."
247+ % filename)
248+
249+ if file_type_counts not in ({
250+ SourcePackageFileType.NATIVE_TARBALL: 1,
251+ SourcePackageFileType.ORIG_TARBALL: 0,
252+ SourcePackageFileType.DEBIAN_TARBALL: 0,
253+ SourcePackageFileType.DIFF: 0,
254+ }, {
255+ SourcePackageFileType.ORIG_TARBALL: 1,
256+ SourcePackageFileType.DIFF: 1,
257+ SourcePackageFileType.NATIVE_TARBALL: 0,
258+ SourcePackageFileType.DEBIAN_TARBALL: 0,
259+ }) or len(component_counts) > 0:
260+ yield UploadError(
261+ "%s: must have exactly one tar.gz, or an orig.tar.gz and diff.gz"
262+ % filename)
263+
264+
265+def check_format_3_0_native_files(filename, file_type_counts,
266+ component_counts, bzip2_count):
267+ """Check that the given counts of each file type suit format 3.0 (native).
268+
269+ A 3.0 (native) source must have only one tar.*. Both gzip and bzip2
270+ compression are permissible.
271+ """
272+
273+ if file_type_counts != {
274+ SourcePackageFileType.NATIVE_TARBALL: 1,
275+ SourcePackageFileType.ORIG_TARBALL: 0,
276+ SourcePackageFileType.DEBIAN_TARBALL: 0,
277+ SourcePackageFileType.DIFF: 0,
278+ } or len(component_counts) > 0:
279+ yield UploadError("%s: must have only a tar.*." % filename)
280+
281+
282+def check_format_3_0_quilt_files(filename, file_type_counts,
283+ component_counts, bzip2_count):
284+ """Check that the given counts of each file type suit format 3.0 (native).
285+
286+ A 3.0 (quilt) source must have exactly one orig.tar.*, one debian.tar.*,
287+ and at most one orig-COMPONENT.tar.* for each COMPONENT. Both gzip and
288+ bzip2 compression are permissible.
289+ """
290+ if file_type_counts != {
291+ SourcePackageFileType.ORIG_TARBALL: 1,
292+ SourcePackageFileType.DEBIAN_TARBALL: 1,
293+ SourcePackageFileType.NATIVE_TARBALL: 0,
294+ SourcePackageFileType.DIFF: 0,
295+ }:
296+ yield UploadError(
297+ "%s: must have only an orig.tar.*, a debian.tar.*, and "
298+ "optionally orig-*.tar.*" % filename)
299+
300+ for component in component_counts:
301+ if component_counts[component] > 1:
302+ yield UploadError(
303+ "%s: has more than one orig-%s.tar.*."
304+ % (filename, component))
305+
306+
307+format_to_file_checker_map = {
308+ SourcePackageFormat.FORMAT_1_0: check_format_1_0_files,
309+ SourcePackageFormat.FORMAT_3_0_NATIVE: check_format_3_0_native_files,
310+ SourcePackageFormat.FORMAT_3_0_QUILT: check_format_3_0_quilt_files,
311+ }

« Back to merge proposal