Code review comment for lp:~wgrant/launchpad/distroseries-source-format-selection
- distroseries-source-format-selection
- Merge into db-devel
Revision history for this message
William Grant (wgrant) wrote : | # |
1 | === modified file 'lib/lp/archiveuploader/dscfile.py' |
2 | --- lib/lp/archiveuploader/dscfile.py 2009-11-18 02:58:23 +0000 |
3 | +++ lib/lp/archiveuploader/dscfile.py 2009-11-25 09:40:30 +0000 |
4 | @@ -30,10 +30,9 @@ |
5 | from lp.archiveuploader.tagfiles import ( |
6 | parse_tagfile, TagFileParseError) |
7 | from lp.archiveuploader.utils import ( |
8 | - prefix_multi_line_string, safe_fix_maintainer, ParseMaintError, |
9 | - re_valid_pkg_name, re_valid_version, re_issource, |
10 | - re_is_component_orig_tar_ext, determine_source_file_type, |
11 | - get_source_file_extension) |
12 | + determine_source_file_type, get_source_file_extension, |
13 | + ParseMaintError, prefix_multi_line_string, re_is_component_orig_tar_ext, |
14 | + re_issource, re_valid_pkg_name, re_valid_version, safe_fix_maintainer) |
15 | from canonical.encoding import guess as guess_encoding |
16 | from lp.registry.interfaces.person import IPersonSet, PersonCreationRationale |
17 | from lp.registry.interfaces.sourcepackage import SourcePackageFileType |
18 | @@ -161,6 +160,9 @@ |
19 | |
20 | Can raise UploadError. |
21 | """ |
22 | + # Avoid circular imports. |
23 | + from lp.archiveuploader.nascentupload import EarlyReturnUploadError |
24 | + |
25 | SourceUploadFile.__init__( |
26 | self, filepath, digest, size, component_and_section, priority, |
27 | package, version, changes, policy, logger) |
28 | @@ -187,6 +189,10 @@ |
29 | if 'format' not in self._dict: |
30 | self._dict['format'] = "1.0" |
31 | |
32 | + if self.format is None: |
33 | + raise EarlyReturnUploadError( |
34 | + "Unsupported source format: %s" % self._dict['format']) |
35 | + |
36 | if self.policy.unsigned_dsc_ok: |
37 | self.logger.debug("DSC file can be unsigned.") |
38 | else: |
39 | @@ -209,7 +215,11 @@ |
40 | @property |
41 | def format(self): |
42 | """Return the DSC format.""" |
43 | - return self._dict['format'] |
44 | + try: |
45 | + return SourcePackageFormat.getTermByToken( |
46 | + self._dict['format']).value |
47 | + except LookupError: |
48 | + return None |
49 | |
50 | @property |
51 | def architecture(self): |
52 | @@ -232,8 +242,6 @@ |
53 | This method is an error generator, i.e, it returns an iterator over all |
54 | exceptions that are generated while processing DSC file checks. |
55 | """ |
56 | - # Avoid circular imports. |
57 | - from lp.archiveuploader.nascentupload import EarlyReturnUploadError |
58 | |
59 | for error in SourceUploadFile.verify(self): |
60 | yield error |
61 | @@ -272,14 +280,8 @@ |
62 | yield UploadError( |
63 | "%s: invalid version %s" % (self.filename, self.dsc_version)) |
64 | |
65 | - try: |
66 | - format_term = SourcePackageFormat.getTermByToken(self.format) |
67 | - except LookupError: |
68 | - raise EarlyReturnUploadError( |
69 | - "Unsupported source format: %s" % self.format) |
70 | - |
71 | if not self.policy.distroseries.isSourcePackageFormatPermitted( |
72 | - format_term.value): |
73 | + self.format): |
74 | yield UploadError( |
75 | "%s: format '%s' is not permitted in %s." % |
76 | (self.filename, self.format, self.policy.distroseries.name)) |
77 | @@ -375,35 +377,32 @@ |
78 | and checksum. |
79 | """ |
80 | |
81 | - diff_count = 0 |
82 | - debian_tar_count = 0 |
83 | - orig_tar_count = 0 |
84 | + file_type_counts = { |
85 | + SourcePackageFileType.DIFF: 0, |
86 | + SourcePackageFileType.ORIG_TARBALL: 0, |
87 | + SourcePackageFileType.DEBIAN_TARBALL: 0, |
88 | + SourcePackageFileType.NATIVE_TARBALL: 0, |
89 | + } |
90 | component_orig_tar_counts = {} |
91 | - native_tar_count = 0 |
92 | - |
93 | bzip2_count = 0 |
94 | - |
95 | files_missing = False |
96 | + |
97 | for sub_dsc_file in self.files: |
98 | - filetype = determine_source_file_type(sub_dsc_file.filename) |
99 | - |
100 | - if filetype == SourcePackageFileType.DIFF: |
101 | - diff_count += 1 |
102 | - elif filetype == SourcePackageFileType.ORIG_TARBALL: |
103 | - orig_tar_count += 1 |
104 | - elif filetype == SourcePackageFileType.COMPONENT_ORIG_TARBALL: |
105 | + file_type = determine_source_file_type(sub_dsc_file.filename) |
106 | + |
107 | + if file_type is None: |
108 | + yield UploadError('Unknown file: ' + sub_dsc_file.filename) |
109 | + continue |
110 | + |
111 | + if file_type == SourcePackageFileType.COMPONENT_ORIG_TARBALL: |
112 | + # Split the count by component name. |
113 | component = re_is_component_orig_tar_ext.match( |
114 | get_source_file_extension(sub_dsc_file.filename)).group(1) |
115 | if component not in component_orig_tar_counts: |
116 | component_orig_tar_counts[component] = 0 |
117 | component_orig_tar_counts[component] += 1 |
118 | - elif filetype == SourcePackageFileType.DEBIAN_TARBALL: |
119 | - debian_tar_count += 1 |
120 | - elif filetype == SourcePackageFileType.NATIVE_TARBALL: |
121 | - native_tar_count += 1 |
122 | else: |
123 | - yield UploadError('Unknown file: ' + sub_dsc_file.filename) |
124 | - continue |
125 | + file_type_counts[file_type] += 1 |
126 | |
127 | if sub_dsc_file.filename.endswith('.bz2'): |
128 | bzip2_count += 1 |
129 | @@ -452,99 +451,10 @@ |
130 | yield error |
131 | files_missing = True |
132 | |
133 | - # Reject if we have more than one file of any type. |
134 | - if orig_tar_count > 1: |
135 | - yield UploadError( |
136 | - "%s: has more than one orig.tar.*." |
137 | - % self.filename) |
138 | - if debian_tar_count > 1: |
139 | - yield UploadError( |
140 | - "%s: has more than one debian.tar.*." |
141 | - % self.filename) |
142 | - if native_tar_count > 1: |
143 | - yield UploadError( |
144 | - "%s: has more than one tar.*." |
145 | - % self.filename) |
146 | - if diff_count > 1: |
147 | - yield UploadError( |
148 | - "%s: has more than one diff.gz." |
149 | - % self.filename) |
150 | - |
151 | - if ((orig_tar_count == 0 and native_tar_count == 0) or |
152 | - (orig_tar_count > 0 and native_tar_count > 0)): |
153 | - yield UploadError( |
154 | - "%s: must have exactly one tar.* or orig.tar.*." |
155 | - % self.filename) |
156 | - |
157 | - if native_tar_count > 0 and debian_tar_count > 0: |
158 | - yield UploadError( |
159 | - "%s: must have no more than one tar.* or debian.tar.*." |
160 | - % self.filename) |
161 | - |
162 | - # Format 1.0 must be native (exactly one tar.gz), or |
163 | - # have an orig.tar.gz and a diff.gz. It cannot have |
164 | - # compression types other than 'gz'. |
165 | - if self.format == '1.0': |
166 | - if bzip2_count > 0: |
167 | - yield UploadError( |
168 | - "%s: is format 1.0 but uses bzip2 compression." |
169 | - % self.filename) |
170 | - |
171 | - if ((diff_count == 0 and native_tar_count == 0) or |
172 | - (diff_count > 0 and native_tar_count > 0)): |
173 | - yield UploadError( |
174 | - "%s: must have exactly one diff.gz or tar.gz." |
175 | - % self.filename) |
176 | - |
177 | - if debian_tar_count > 0: |
178 | - yield UploadError( |
179 | - "%s: is format 1.0 but has debian.tar.*." |
180 | - % self.filename) |
181 | - |
182 | - if len(component_orig_tar_counts) > 0: |
183 | - yield UploadError( |
184 | - "%s: is format 1.0 but has orig-COMPONENT.tar.*." |
185 | - % self.filename) |
186 | - # Format 3.0 (native) must have exactly one tar.*. |
187 | - # gz and bz2 are valid compression types. |
188 | - elif self.format == '3.0 (native)': |
189 | - if native_tar_count == 0: |
190 | - yield UploadError( |
191 | - "%s: must have exactly one tar.*." |
192 | - % self.filename) |
193 | - |
194 | - if diff_count > 0: |
195 | - yield UploadError( |
196 | - "%s: is format 3.0 but has diff.gz." |
197 | - % self.filename) |
198 | - |
199 | - if len(component_orig_tar_counts) > 0: |
200 | - yield UploadError( |
201 | - "%s: is native but has orig-COMPONENT.tar.*." |
202 | - % self.filename) |
203 | - elif self.format == '3.0 (quilt)': |
204 | - if orig_tar_count == 0: |
205 | - yield UploadError( |
206 | - "%s: must have exactly one orig.tar.*." |
207 | - % self.filename) |
208 | - |
209 | - if debian_tar_count == 0: |
210 | - yield UploadError( |
211 | - "%s: must have exactly one debian.tar.*." |
212 | - % self.filename) |
213 | - |
214 | - if diff_count > 0: |
215 | - yield UploadError( |
216 | - "%s: is format 3.0 but has diff.gz." |
217 | - % self.filename) |
218 | - |
219 | - for component in component_orig_tar_counts: |
220 | - if component_orig_tar_counts[component] > 1: |
221 | - yield UploadError( |
222 | - "%s: has more than one orig-%s.tar.*." |
223 | - % (self.filename, component)) |
224 | - else: |
225 | - raise AssertionError("Unknown source format.") |
226 | + for error in format_to_file_checker_map[self.format]( |
227 | + self.filename, file_type_counts, component_orig_tar_counts, |
228 | + bzip2_count): |
229 | + yield error |
230 | |
231 | if files_missing: |
232 | yield UploadError( |
233 | @@ -727,3 +637,78 @@ |
234 | yield error |
235 | |
236 | |
237 | +def check_format_1_0_files(filename, file_type_counts, component_counts, |
238 | + bzip2_count): |
239 | + """Check that the given counts of each file type suit format 1.0. |
240 | + |
241 | + A 1.0 source must be native (with only one tar.gz), or have an orig.tar.gz |
242 | + and a diff.gz. It cannot use bzip2 compression. |
243 | + """ |
244 | + if bzip2_count > 0: |
245 | + yield UploadError( |
246 | + "%s: is format 1.0 but uses bzip2 compression." |
247 | + % filename) |
248 | + |
249 | + if file_type_counts not in ({ |
250 | + SourcePackageFileType.NATIVE_TARBALL: 1, |
251 | + SourcePackageFileType.ORIG_TARBALL: 0, |
252 | + SourcePackageFileType.DEBIAN_TARBALL: 0, |
253 | + SourcePackageFileType.DIFF: 0, |
254 | + }, { |
255 | + SourcePackageFileType.ORIG_TARBALL: 1, |
256 | + SourcePackageFileType.DIFF: 1, |
257 | + SourcePackageFileType.NATIVE_TARBALL: 0, |
258 | + SourcePackageFileType.DEBIAN_TARBALL: 0, |
259 | + }) or len(component_counts) > 0: |
260 | + yield UploadError( |
261 | + "%s: must have exactly one tar.gz, or an orig.tar.gz and diff.gz" |
262 | + % filename) |
263 | + |
264 | + |
265 | +def check_format_3_0_native_files(filename, file_type_counts, |
266 | + component_counts, bzip2_count): |
267 | + """Check that the given counts of each file type suit format 3.0 (native). |
268 | + |
269 | + A 3.0 (native) source must have only one tar.*. Both gzip and bzip2 |
270 | + compression are permissible. |
271 | + """ |
272 | + |
273 | + if file_type_counts != { |
274 | + SourcePackageFileType.NATIVE_TARBALL: 1, |
275 | + SourcePackageFileType.ORIG_TARBALL: 0, |
276 | + SourcePackageFileType.DEBIAN_TARBALL: 0, |
277 | + SourcePackageFileType.DIFF: 0, |
278 | + } or len(component_counts) > 0: |
279 | + yield UploadError("%s: must have only a tar.*." % filename) |
280 | + |
281 | + |
282 | +def check_format_3_0_quilt_files(filename, file_type_counts, |
283 | + component_counts, bzip2_count): |
284 | + """Check that the given counts of each file type suit format 3.0 (native). |
285 | + |
286 | + A 3.0 (quilt) source must have exactly one orig.tar.*, one debian.tar.*, |
287 | + and at most one orig-COMPONENT.tar.* for each COMPONENT. Both gzip and |
288 | + bzip2 compression are permissible. |
289 | + """ |
290 | + if file_type_counts != { |
291 | + SourcePackageFileType.ORIG_TARBALL: 1, |
292 | + SourcePackageFileType.DEBIAN_TARBALL: 1, |
293 | + SourcePackageFileType.NATIVE_TARBALL: 0, |
294 | + SourcePackageFileType.DIFF: 0, |
295 | + }: |
296 | + yield UploadError( |
297 | + "%s: must have only an orig.tar.*, a debian.tar.*, and " |
298 | + "optionally orig-*.tar.*" % filename) |
299 | + |
300 | + for component in component_counts: |
301 | + if component_counts[component] > 1: |
302 | + yield UploadError( |
303 | + "%s: has more than one orig-%s.tar.*." |
304 | + % (filename, component)) |
305 | + |
306 | + |
307 | +format_to_file_checker_map = { |
308 | + SourcePackageFormat.FORMAT_1_0: check_format_1_0_files, |
309 | + SourcePackageFormat.FORMAT_3_0_NATIVE: check_format_3_0_native_files, |
310 | + SourcePackageFormat.FORMAT_3_0_QUILT: check_format_3_0_quilt_files, |
311 | + } |
On Tue, 2009-11-24 at 23:20 +0000, Julian Edwards wrote:
> Review: Needs Fixing code
> Thanks for making this change William. I've commented inline.
Thanks for the review, Julian. I've fixed the main issues -- an
intermediate diff is attached.
> > === modified file 'lib/lp/ archiveuploader /dscfile. py' archiveuploader /dscfile. py 2009-11-16 22:06:14 +0000 archiveuploader /dscfile. py 2009-11-18 03:10:29 +0000 der.utils import ( multi_line_ string, safe_fix_ maintainer, ParseMaintError, source_ file_type) _orig_tar_ ext, determine_ source_ file_type, file_extension)
> > --- lib/lp/
> > +++ lib/lp/
> > @@ -32,7 +32,8 @@
> > from lp.archiveuploa
> > prefix_
> > re_valid_pkg_name, re_valid_version, re_issource,
> > - determine_
> > + re_is_component
> > + get_source_
>
> Can you put the imports in alphabetical order please.
Done.
> > from canonical.encoding import guess as guess_encoding interfaces. person import IPersonSet, PersonCreationR ationale interfaces. sourcepackage import SourcePackageFi leType self.policy. distro, ArchivePurpose. PARTNER) ] archive. purpose == ArchivePurpose.PPA and source_ file_type( filename) == leType. ORIG_TARBALL) : source_ file_type( filename) in ( leType. ORIG_TARBALL, leType. COMPONENT_ ORIG_TARBALL) ): archive, self.policy. distro. main_archive] archive] orig_tar_ counts = {} source_ file_type( sub_dsc_ file.filename) leType. ORIG_TARBALL: leType. COMPONENT_ ORIG_TARBALL: _orig_tar_ ext.match( file_extension( sub_dsc_ file.filename) ).group( 1) orig_tar_ counts: orig_tar_ counts[ component] = 0 orig_tar_ counts[ component] += 1 leType. DEBIAN_ TARBALL: leType. NATIVE_ TARBALL:
> > from lp.registry.
> > from lp.registry.
> > @@ -347,8 +348,9 @@
> > distribution=
> > purpose=
> > elif (self.policy.
> > - determine_
> > - SourcePackageFi
> > + determine_
> > + SourcePackageFi
> > + SourcePackageFi
> > archives = [self.policy.
> > else:
> > archives = [self.policy.
> > @@ -374,9 +376,13 @@
> > """
> >
> > diff_count = 0
> > + debian_tar_count = 0
> > orig_tar_count = 0
> > + component_
> > native_tar_count = 0
> >
> > + bzip2_count = 0
> > +
> > files_missing = False
> > for sub_dsc_file in self.files:
> > filetype = determine_
> > @@ -385,12 +391,23 @@
> > diff_count += 1
> > elif filetype == SourcePackageFi
> > orig_tar_count += 1
> > + elif filetype == SourcePackageFi
> > + component = re_is_component
> > + get_source_
> > + if component not in component_
> > + component_
> > + component_
> > + elif filetype == SourcePackageFi
> > + debian_tar_count += 1
> > elif filetype == SourcePackageFi
> > native_tar_count += 1
>
> This big list of "elif" statements is looking a bit unwieldy, especially with all the new conditions. Unless you can suggest anything better, how about we do something like:
> [snip]
While your suggested solution would not work (at least not without the ORIG_TARBALL) , and the code for each format becomes much
'nonlocal' keyword), I believe I have found something better. I keep one
dict mapping SPFTs to counts, and another mapping component names to
counts. This means there's only one special case (for
COMPONENT_
cleaner too.
> > else: 'Unknown file: ' + sub_dsc_ file.filename) file.filename. endswith( '.bz2') : Name( file.filename) orig_tar_ counts) > 0: tar.*." orig_tar_ counts) > 0: tar.*." orig_tar_ counts: orig_tar_ counts[ component] > 1:
> > yield UploadError(
> > continue
> >
> > + if sub_dsc_
> > + bzip2_count += 1
> > +
> > try:
> > library_file, file_archive = self._getFileBy
> > sub_dsc_
> > @@ -440,6 +457,10 @@
> > yield UploadError(
> > "%s: has more than one orig.tar.*."
> > % self.filename)
> > + if debian_tar_count > 1:
> > + yield UploadError(
> > + "%s: has more than one debian.tar.*."
> > + % self.filename)
> > if native_tar_count > 1:
> > yield UploadError(
> > "%s: has more than one tar.*."
> > @@ -455,15 +476,73 @@
> > "%s: must have exactly one tar.* or orig.tar.*."
> > % self.filename)
> >
> > + if native_tar_count > 0 and debian_tar_count > 0:
> > + yield UploadError(
> > + "%s: must have no more than one tar.* or debian.tar.*."
> > + % self.filename)
> > +
> > # Format 1.0 must be native (exactly one tar.gz), or
> > # have an orig.tar.gz and a diff.gz. It cannot have
> > # compression types other than 'gz'.
> > if self.format == '1.0':
> > + if bzip2_count > 0:
> > + yield UploadError(
> > + "%s: is format 1.0 but uses bzip2 compression."
> > + % self.filename)
> > +
> > if ((diff_count == 0 and native_tar_count == 0) or
> > (diff_count > 0 and native_tar_count > 0)):
> > yield UploadError(
> > "%s: must have exactly one diff.gz or tar.gz."
> > % self.filename)
> > +
> > + if debian_tar_count > 0:
> > + yield UploadError(
> > + "%s: is format 1.0 but has debian.tar.*."
> > + % self.filename)
> > +
> > + if len(component_
> > + yield UploadError(
> > + "%s: is format 1.0 but has orig-COMPONENT.
> > + % self.filename)
> > + # Format 3.0 (native) must have exactly one tar.*.
> > + # gz and bz2 are valid compression types.
> > + elif self.format == '3.0 (native)':
> > + if native_tar_count == 0:
> > + yield UploadError(
> > + "%s: must have exactly one tar.*."
> > + % self.filename)
> > +
> > + if diff_count > 0:
> > + yield UploadError(
> > + "%s: is format 3.0 but has diff.gz."
> > + % self.filename)
> > +
> > + if len(component_
> > + yield UploadError(
> > + "%s: is native but has orig-COMPONENT.
> > + % self.filename)
> > + elif self.format == '3.0 (quilt)':
> > + if orig_tar_count == 0:
> > + yield UploadError(
> > + "%s: must have exactly one orig.tar.*."
> > + % self.filename)
> > +
> > + if debian_tar_count == 0:
> > + yield UploadError(
> > + "%s: must have exactly one debian.tar.*."
> > + % self.filename)
> > +
> > + if diff_count > 0:
> > + yield UploadError(
> > + "%s: is format 3.0 but has diff.gz."
> > + % self.filename)
> > +
> > + for component in component_
> > + if component_
> > + yield UploadError(
> > + "%s: has more than one orig-%s.tar.*."
> > + % (self.filename, component))
>
> Can you split this massive method up a bit please. Ideally into three separate
> methods that check the different formats.
I've split it, and completely changed the verification style. The
failure messages are much less verbose, but one would have to manually
edit the .dsc to get any of them anyway. I feel the reduction in code
makes things much better.
> That will also make it easier for you to unit test this change without creating
> new test packages so that we can be sure all the new file type rejection cases
> are handled.
This does make testing much easier.
However, there are now just a few code paths, but a lot of potential
failure cases. How completely should I test?
> Also, can you please change it to compare against the format enum instead "Unknown source format.")
> of strings. Then the assertion error below will make more sense.
>
> > else:
> > raise AssertionError(
It will now die with a KeyError when looking up the file checker, which
seems as reasonable as the old explicit AssertionError.
> > archiveuploader /nascentuploadf ile.py' archiveuploader /nascentuploadf ile.py 2009-11-10 13:09:26 +0000 archiveuploader /nascentuploadf ile.py 2009-11-18 03:10:30 +0000 source_ file_type( self.filename) == ( leType. ORIG_TARBALL) : source_ file_type( self.filename) in ( leType. ORIG_TARBALL, leType. COMPONENT_ ORIG_TARBALL) :
> >
> > === modified file 'lib/lp/
> > --- lib/lp/
> > +++ lib/lp/
> > @@ -352,8 +352,9 @@
> > "Architecture field." % (self.filename))
> >
> > version_chopped = re_no_epoch.sub('', self.version)
> > - if determine_
> > - SourcePackageFi
> > + if determine_
> > + SourcePackageFi
> > + SourcePackageFi
>
> I've seen this check a couple of times now, I think it would be a good idea to
> refactor it into a property. (is_orig or something like that)
The two uses are on different types: this is a SourceUploadFile, while leaseFile. Should I instead create a
the other is a SourcePackageRe
utility function alongside SPFT, perhaps?
> [snip]