Code review comment for lp:~wgrant/launchpad/distroseries-source-format-selection
- distroseries-source-format-selection
- Merge into db-devel
Revision history for this message
William Grant (wgrant) wrote : | # |
1 | === modified file 'lib/lp/archiveuploader/dscfile.py' |
2 | --- lib/lp/archiveuploader/dscfile.py 2009-11-25 09:40:38 +0000 |
3 | +++ lib/lp/archiveuploader/dscfile.py 2009-11-26 00:42:45 +0000 |
4 | @@ -451,7 +451,13 @@ |
5 | yield error |
6 | files_missing = True |
7 | |
8 | - for error in format_to_file_checker_map[self.format]( |
9 | + try: |
10 | + file_checker = format_to_file_checker_map[self.format] |
11 | + except KeyError: |
12 | + raise AssertionError( |
13 | + "No file checker for source format %s." % self.format) |
14 | + |
15 | + for error in file_checker( |
16 | self.filename, file_type_counts, component_orig_tar_counts, |
17 | bzip2_count): |
18 | yield error |
19 | @@ -649,17 +655,23 @@ |
20 | "%s: is format 1.0 but uses bzip2 compression." |
21 | % filename) |
22 | |
23 | - if file_type_counts not in ({ |
24 | - SourcePackageFileType.NATIVE_TARBALL: 1, |
25 | - SourcePackageFileType.ORIG_TARBALL: 0, |
26 | - SourcePackageFileType.DEBIAN_TARBALL: 0, |
27 | - SourcePackageFileType.DIFF: 0, |
28 | - }, { |
29 | - SourcePackageFileType.ORIG_TARBALL: 1, |
30 | - SourcePackageFileType.DIFF: 1, |
31 | - SourcePackageFileType.NATIVE_TARBALL: 0, |
32 | - SourcePackageFileType.DEBIAN_TARBALL: 0, |
33 | - }) or len(component_counts) > 0: |
34 | + valid_file_type_counts = [ |
35 | + { |
36 | + SourcePackageFileType.NATIVE_TARBALL: 1, |
37 | + SourcePackageFileType.ORIG_TARBALL: 0, |
38 | + SourcePackageFileType.DEBIAN_TARBALL: 0, |
39 | + SourcePackageFileType.DIFF: 0, |
40 | + }, |
41 | + { |
42 | + SourcePackageFileType.ORIG_TARBALL: 1, |
43 | + SourcePackageFileType.DIFF: 1, |
44 | + SourcePackageFileType.NATIVE_TARBALL: 0, |
45 | + SourcePackageFileType.DEBIAN_TARBALL: 0, |
46 | + }, |
47 | + ] |
48 | + |
49 | + if (file_type_counts not in valid_file_type_counts or |
50 | + len(component_counts) > 0): |
51 | yield UploadError( |
52 | "%s: must have exactly one tar.gz, or an orig.tar.gz and diff.gz" |
53 | % filename) |
54 | @@ -673,12 +685,17 @@ |
55 | compression are permissible. |
56 | """ |
57 | |
58 | - if file_type_counts != { |
59 | - SourcePackageFileType.NATIVE_TARBALL: 1, |
60 | - SourcePackageFileType.ORIG_TARBALL: 0, |
61 | - SourcePackageFileType.DEBIAN_TARBALL: 0, |
62 | - SourcePackageFileType.DIFF: 0, |
63 | - } or len(component_counts) > 0: |
64 | + valid_file_type_counts = [ |
65 | + { |
66 | + SourcePackageFileType.NATIVE_TARBALL: 1, |
67 | + SourcePackageFileType.ORIG_TARBALL: 0, |
68 | + SourcePackageFileType.DEBIAN_TARBALL: 0, |
69 | + SourcePackageFileType.DIFF: 0, |
70 | + }, |
71 | + ] |
72 | + |
73 | + if (file_type_counts not in valid_file_type_counts or |
74 | + len(component_counts) > 0): |
75 | yield UploadError("%s: must have only a tar.*." % filename) |
76 | |
77 | |
78 | @@ -690,12 +707,17 @@ |
79 | and at most one orig-COMPONENT.tar.* for each COMPONENT. Both gzip and |
80 | bzip2 compression are permissible. |
81 | """ |
82 | - if file_type_counts != { |
83 | - SourcePackageFileType.ORIG_TARBALL: 1, |
84 | - SourcePackageFileType.DEBIAN_TARBALL: 1, |
85 | - SourcePackageFileType.NATIVE_TARBALL: 0, |
86 | - SourcePackageFileType.DIFF: 0, |
87 | - }: |
88 | + |
89 | + valid_file_type_counts = [ |
90 | + { |
91 | + SourcePackageFileType.ORIG_TARBALL: 1, |
92 | + SourcePackageFileType.DEBIAN_TARBALL: 1, |
93 | + SourcePackageFileType.NATIVE_TARBALL: 0, |
94 | + SourcePackageFileType.DIFF: 0, |
95 | + }, |
96 | + ] |
97 | + |
98 | + if file_type_counts not in valid_file_type_counts: |
99 | yield UploadError( |
100 | "%s: must have only an orig.tar.*, a debian.tar.*, and " |
101 | "optionally orig-*.tar.*" % filename) |
102 | |
103 | === modified file 'lib/lp/archiveuploader/nascentuploadfile.py' |
104 | --- lib/lp/archiveuploader/nascentuploadfile.py 2009-11-14 02:55:15 +0000 |
105 | +++ lib/lp/archiveuploader/nascentuploadfile.py 2009-11-25 23:38:51 +0000 |
106 | @@ -35,7 +35,6 @@ |
107 | re_no_epoch, re_no_revision, re_valid_version, re_valid_pkg_name, |
108 | re_extract_src_version, determine_source_file_type) |
109 | from canonical.encoding import guess as guess_encoding |
110 | -from lp.registry.interfaces.sourcepackage import SourcePackageFileType |
111 | from lp.soyuz.interfaces.binarypackagename import ( |
112 | IBinaryPackageNameSet) |
113 | from lp.soyuz.interfaces.binarypackagerelease import ( |
114 | @@ -49,6 +48,7 @@ |
115 | from lp.soyuz.interfaces.publishing import ( |
116 | PackagePublishingPriority) |
117 | from lp.soyuz.interfaces.section import ISectionSet |
118 | +from lp.soyuz.model.files import SourceFileMixin |
119 | from canonical.librarian.utils import filechunks |
120 | |
121 | |
122 | @@ -332,7 +332,7 @@ |
123 | return getUtility(ISectionSet)[self.section_name] |
124 | |
125 | |
126 | -class SourceUploadFile(PackageUploadFile): |
127 | +class SourceUploadFile(SourceFileMixin, PackageUploadFile): |
128 | """Files mentioned in changesfile as source (orig, diff, tar). |
129 | |
130 | This class only check consistency on information contained in |
131 | @@ -340,6 +340,11 @@ |
132 | Further checks on file contents and package consistency are done |
133 | in DSCFile. |
134 | """ |
135 | + |
136 | + @property |
137 | + def filetype(self): |
138 | + return determine_source_file_type(self.filename) |
139 | + |
140 | def verify(self): |
141 | """Verify the uploaded source file. |
142 | |
143 | @@ -352,9 +357,7 @@ |
144 | "Architecture field." % (self.filename)) |
145 | |
146 | version_chopped = re_no_epoch.sub('', self.version) |
147 | - if determine_source_file_type(self.filename) in ( |
148 | - SourcePackageFileType.ORIG_TARBALL, |
149 | - SourcePackageFileType.COMPONENT_ORIG_TARBALL): |
150 | + if self.is_orig: |
151 | version_chopped = re_no_revision.sub('', version_chopped) |
152 | |
153 | source_match = re_issource.match(self.filename) |
154 | |
155 | === modified file 'lib/lp/archiveuploader/tests/nascentuploadfile.txt' |
156 | --- lib/lp/archiveuploader/tests/nascentuploadfile.txt 2009-10-28 05:24:11 +0000 |
157 | +++ lib/lp/archiveuploader/tests/nascentuploadfile.txt 2009-11-26 00:36:18 +0000 |
158 | @@ -569,6 +569,115 @@ |
159 | ['File ed_0.2-20.dsc mentioned in the changes has a size mismatch. 578 != 500'] |
160 | |
161 | |
162 | +=== Format file type verification === |
163 | + |
164 | +DSCFile performs additional verification on the types of the referenced |
165 | +files, confirming that they are suitable for the source package's |
166 | +format. There is an error generator to verify each format. |
167 | + |
168 | + >>> from lp.archiveuploader.dscfile import (check_format_1_0_files, |
169 | + ... check_format_3_0_native_files, check_format_3_0_quilt_files) |
170 | + >>> from lp.registry.interfaces.sourcepackage import SourcePackageFileType |
171 | + |
172 | +==== 1.0 ==== |
173 | + |
174 | +A 1.0 source can contain either a tar.gz or an orig.tar.gz and diff.gz. |
175 | + |
176 | + >>> list(check_format_1_0_files('foo_1.dsc', { |
177 | + ... SourcePackageFileType.ORIG_TARBALL: 1, |
178 | + ... SourcePackageFileType.DIFF: 1, |
179 | + ... SourcePackageFileType.DEBIAN_TARBALL: 0, |
180 | + ... SourcePackageFileType.NATIVE_TARBALL: 0, |
181 | + ... }, {}, 0)) |
182 | + [] |
183 | + |
184 | + >>> list(check_format_1_0_files('foo_1.dsc', { |
185 | + ... SourcePackageFileType.NATIVE_TARBALL: 1, |
186 | + ... SourcePackageFileType.ORIG_TARBALL: 0, |
187 | + ... SourcePackageFileType.DIFF: 0, |
188 | + ... SourcePackageFileType.DEBIAN_TARBALL: 0, |
189 | + ... }, {}, 0)) |
190 | + [] |
191 | + |
192 | +But if we have some other combination, or bzip2 compression, errors |
193 | +will be generated. |
194 | + |
195 | + >>> list(check_format_1_0_files('foo_1.dsc', { |
196 | + ... SourcePackageFileType.NATIVE_TARBALL: 1, |
197 | + ... SourcePackageFileType.ORIG_TARBALL: 1, |
198 | + ... SourcePackageFileType.DIFF: 1, |
199 | + ... SourcePackageFileType.DEBIAN_TARBALL: 0, |
200 | + ... }, {}, 1)) |
201 | + [UploadError('foo_1.dsc: is format 1.0 but uses bzip2 compression.',), UploadError('foo_1.dsc: must have exactly one tar.gz, or an orig.tar.gz and diff.gz',)] |
202 | + |
203 | +The files are also bad if there are any components: |
204 | + |
205 | + >>> list(check_format_1_0_files('foo_1.dsc', { |
206 | + ... SourcePackageFileType.ORIG_TARBALL: 1, |
207 | + ... SourcePackageFileType.DIFF: 1, |
208 | + ... SourcePackageFileType.DEBIAN_TARBALL: 0, |
209 | + ... SourcePackageFileType.NATIVE_TARBALL: 0, |
210 | + ... }, {'foo': 1}, 0)) |
211 | + [UploadError('foo_1.dsc: must have exactly one tar.gz, or an orig.tar.gz and diff.gz',)] |
212 | + |
213 | +==== 3.0 (native) ==== |
214 | + |
215 | +A 3.0 (native) source must contain just a tar.(gz|bz2). |
216 | + |
217 | + >>> list(check_format_3_0_native_files('foo_1.dsc', { |
218 | + ... SourcePackageFileType.NATIVE_TARBALL: 1, |
219 | + ... SourcePackageFileType.ORIG_TARBALL: 0, |
220 | + ... SourcePackageFileType.DIFF: 0, |
221 | + ... SourcePackageFileType.DEBIAN_TARBALL: 0, |
222 | + ... }, {}, 1)) |
223 | + [] |
224 | + |
225 | + >>> list(check_format_3_0_native_files('foo_1.dsc', { |
226 | + ... SourcePackageFileType.NATIVE_TARBALL: 1, |
227 | + ... SourcePackageFileType.ORIG_TARBALL: 1, |
228 | + ... SourcePackageFileType.DIFF: 0, |
229 | + ... SourcePackageFileType.DEBIAN_TARBALL: 0, |
230 | + ... }, {}, 1)) |
231 | + [UploadError('foo_1.dsc: must have only a tar.*.',)] |
232 | + |
233 | + >>> list(check_format_3_0_native_files('foo_1.dsc', { |
234 | + ... SourcePackageFileType.NATIVE_TARBALL: 1, |
235 | + ... SourcePackageFileType.ORIG_TARBALL: 0, |
236 | + ... SourcePackageFileType.DIFF: 0, |
237 | + ... SourcePackageFileType.DEBIAN_TARBALL: 0, |
238 | + ... }, {'foo': 1}, 0)) |
239 | + [UploadError('foo_1.dsc: must have only a tar.*.',)] |
240 | + |
241 | +==== 3.0 (quilt) ==== |
242 | + |
243 | +A 3.0 (quilt) source must have an orig.tar.*, a debian.tar.*, and at |
244 | +most one orig-COMPONENT.tar.* for each COMPONENT. |
245 | + |
246 | + >>> list(check_format_3_0_quilt_files('foo_1.dsc', { |
247 | + ... SourcePackageFileType.ORIG_TARBALL: 1, |
248 | + ... SourcePackageFileType.DEBIAN_TARBALL: 1, |
249 | + ... SourcePackageFileType.NATIVE_TARBALL: 0, |
250 | + ... SourcePackageFileType.DIFF: 0, |
251 | + ... }, {'foo': 1}, 1)) |
252 | + [] |
253 | + |
254 | + >>> list(check_format_3_0_quilt_files('foo_1.dsc', { |
255 | + ... SourcePackageFileType.NATIVE_TARBALL: 1, |
256 | + ... SourcePackageFileType.ORIG_TARBALL: 1, |
257 | + ... SourcePackageFileType.DIFF: 0, |
258 | + ... SourcePackageFileType.DEBIAN_TARBALL: 1, |
259 | + ... }, {}, 1)) |
260 | + [UploadError('foo_1.dsc: must have only an orig.tar.*, a debian.tar.*, and optionally orig-*.tar.*',)] |
261 | + |
262 | + >>> list(check_format_3_0_quilt_files('foo_1.dsc', { |
263 | + ... SourcePackageFileType.ORIG_TARBALL: 1, |
264 | + ... SourcePackageFileType.DEBIAN_TARBALL: 1, |
265 | + ... SourcePackageFileType.NATIVE_TARBALL: 0, |
266 | + ... SourcePackageFileType.DIFF: 0, |
267 | + ... }, {'foo': 2}, 0)) |
268 | + [UploadError('foo_1.dsc: has more than one orig-foo.tar.*.',)] |
269 | + |
270 | + |
271 | === Sub-DSC files or DSCUploadedFiles === |
272 | |
273 | Sub-DSCFiles are DSCUploadedFile objects. |
274 | |
275 | === modified file 'lib/lp/soyuz/interfaces/files.py' |
276 | --- lib/lp/soyuz/interfaces/files.py 2009-06-25 04:06:00 +0000 |
277 | +++ lib/lp/soyuz/interfaces/files.py 2009-11-25 23:23:30 +0000 |
278 | @@ -14,7 +14,7 @@ |
279 | 'ISourcePackageReleaseFileSet', |
280 | ] |
281 | |
282 | -from zope.schema import Int |
283 | +from zope.schema import Bool, Int |
284 | from zope.interface import Interface |
285 | from canonical.launchpad import _ |
286 | |
287 | @@ -68,6 +68,11 @@ |
288 | title=_('The type of this file'), required=True, readonly=False, |
289 | ) |
290 | |
291 | + is_orig = Bool( |
292 | + title=_('Whether this file is an original tarball'), |
293 | + required=True, readonly=False, |
294 | + ) |
295 | + |
296 | |
297 | class ISourcePackageReleaseFileSet(Interface): |
298 | """The set of all `SourcePackageRelease`s.""" |
299 | |
300 | === modified file 'lib/lp/soyuz/model/files.py' |
301 | --- lib/lp/soyuz/model/files.py 2009-06-25 04:06:00 +0000 |
302 | +++ lib/lp/soyuz/model/files.py 2009-11-25 23:38:57 +0000 |
303 | @@ -60,7 +60,18 @@ |
304 | "binarypackagerelease.binarypackagename"]) |
305 | |
306 | |
307 | -class SourcePackageReleaseFile(SQLBase): |
308 | +class SourceFileMixin: |
309 | + """Mix-in class for common functionality between source file classes.""" |
310 | + |
311 | + @property |
312 | + def is_orig(self): |
313 | + return self.filetype in ( |
314 | + SourcePackageFileType.ORIG_TARBALL, |
315 | + SourcePackageFileType.COMPONENT_ORIG_TARBALL |
316 | + ) |
317 | + |
318 | + |
319 | +class SourcePackageReleaseFile(SourceFileMixin, SQLBase): |
320 | """See ISourcePackageFile""" |
321 | |
322 | implements(ISourcePackageReleaseFile) |
323 | @@ -90,4 +101,3 @@ |
324 | prejoins=["libraryfile", "libraryfile.content", |
325 | "sourcepackagerelease", |
326 | "sourcepackagerelease.sourcepackagename"]) |
327 | - |
328 | |
329 | === modified file 'lib/lp/soyuz/model/queue.py' |
330 | --- lib/lp/soyuz/model/queue.py 2009-11-14 02:55:15 +0000 |
331 | +++ lib/lp/soyuz/model/queue.py 2009-11-25 23:36:55 +0000 |
332 | @@ -61,7 +61,6 @@ |
333 | NonBuildableSourceUploadError, QueueBuildAcceptError, |
334 | QueueInconsistentStateError, QueueSourceAcceptError, |
335 | QueueStateWriteProtectedError) |
336 | -from lp.registry.interfaces.sourcepackage import SourcePackageFileType |
337 | from canonical.launchpad.mail import ( |
338 | format_address, signed_message_from_string, sendmail) |
339 | from lp.soyuz.scripts.processaccepted import ( |
340 | @@ -1468,9 +1467,7 @@ |
341 | published_sha1 = published_file.content.sha1 |
342 | |
343 | # Multiple orig(s) with the same content are fine. |
344 | - if source_file.filetype in ( |
345 | - SourcePackageFileType.ORIG_TARBALL, |
346 | - SourcePackageFileType.COMPONENT_ORIG_TARBALL): |
347 | + if source_file.is_orig: |
348 | if proposed_sha1 == published_sha1: |
349 | continue |
350 | raise QueueInconsistentStateError( |
On Wed, 2009-11-25 at 23:03 +0000, Julian Edwards wrote: ORIG_TARBALL) , and the code for each format becomes much
> Review: Needs Fixing code
> > While your suggested solution would not work (at least not without the
> > 'nonlocal' keyword), I believe I have found something better. I keep one
> > dict mapping SPFTs to counts, and another mapping component names to
> > counts. This means there's only one special case (for
> > COMPONENT_
> > cleaner too.
>
> Ah, this is 100% better than my suggestion, nice one!
>
> > I've split it, and completely changed the verification style. The
> > failure messages are much less verbose, but one would have to manually
> > edit the .dsc to get any of them anyway. I feel the reduction in code
> > makes things much better.
>
> Actually it is not only better, it's easier to test because there are
> fewer cases!
>
> > This does make testing much easier.
> >
> > However, there are now just a few code paths, but a lot of potential
> > failure cases. How completely should I test?
>
> We always test 100% code coverage at least and any peculiar corner cases
> that are obvious.
>
> I'd be happy here to have 2 tests per format type; one that tests it accepts
> correctly and one that tests it fails correctly, since that's pretty much
> all the code is doing now.
I've added tests in nascentuploadfi le.txt.
> > > Also, can you please change it to compare against the format enum instead "Unknown source format.")
> > > of strings. Then the assertion error below will make more sense.
> > >
> > > > else:
> > > > raise AssertionError(
> >
> > It will now die with a KeyError when looking up the file checker, which
> > seems as reasonable as the old explicit AssertionError.
>
> I don't really like that, it's not as explicit as the old error, particularly
> because the error will appear in the rejection email (unlikely as it is).
>
> Can you catch the KeyError and re-raise it as AssertionError with some good
> text. This will ensure if/when we add more format types, it will be easier to
> track down why the failure occurred.
Done.
> > The two uses are on different types: this is a SourceUploadFile, while leaseFile. Should I instead create a
> > the other is a SourcePackageRe
> > utility function alongside SPFT, perhaps?
>
> I would make a mix-in class that both can inherit from and put the property
> on that.
As you suggested on IRC, I've put it in model.files. SourceFileMixin .
lp.soyuz.
> > @@ -375,35 +377,32 @@ leType. DIFF: 0, leType. ORIG_TARBALL: 0, leType. DEBIAN_ TARBALL: 0, leType. NATIVE_ TARBALL: 0, orig_tar_ counts = {} source_ file_type( sub_dsc_ file.filename) leType. DIFF: leType. ORIG_TARBALL: leType. COMPONENT_ ORIG_TARBALL: source_ file_type( sub_dsc_ file.filename) 'Unknown file: ' + sub_dsc_ file.filename) leType. COMPONENT_ ORIG_TARBALL: _orig_tar_ ext.match( file_extension( sub_dsc_ file.filename) ).group( 1) orig_tar_ counts: orig_tar_ counts[ component] = 0 orig_tar_ counts[ component] += 1 leType. DEBIAN_ TARBALL: leType. NATIVE_ TARBALL: 'Unknown file: ' + sub_dsc_ file.filename) counts[ file_type] += 1 1_0_files( filename, file_type_counts, component_counts, leType. NATIVE_ TARBALL: 1, leType. ORIG_TARBALL: 0, leType. DEBIAN_ TARBALL: 0, leType. DIFF: 0, leType. ORIG_TARBALL: 1, leType. DIFF: 1, leType. NATIVE_ TARBALL: 0, leType. DEBIAN_ TARBALL: 0, counts) > 0: type_counts = [ leType. NATIVE_ TARBALL: 1, type_counts ...
> > and checksum.
> > """
> >
> > - diff_count = 0
> > - debian_tar_count = 0
> > - orig_tar_count = 0
> > + file_type_counts = {
> > + SourcePackageFi
> > + SourcePackageFi
> > + SourcePackageFi
> > + SourcePackageFi
> > + }
> > component_
> > - native_tar_count = 0
> > -
> > bzip2_count = 0
> > -
>
> *Loads* better!
>
> > files_missing = False
> > +
> > for sub_dsc_file in self.files:
> > - filetype = determine_
> > -
> > - if filetype == SourcePackageFi
> > - diff_count += 1
> > - elif filetype == SourcePackageFi
> > - orig_tar_count += 1
> > - elif filetype == SourcePackageFi
> > + file_type = determine_
> > +
> > + if file_type is None:
> > + yield UploadError(
> > + continue
> > +
> > + if file_type == SourcePackageFi
> > + # Split the count by component name.
> > component = re_is_component
> > get_source_
> > if component not in component_
> > component_
> > component_
> > - elif filetype == SourcePackageFi
> > - debian_tar_count += 1
> > - elif filetype == SourcePackageFi
> > - native_tar_count += 1
> > else:
> > - yield UploadError(
> > - continue
> > + file_type_
>
> This is one of the best bits of crappy code cleaned up I've seen in years :)
>
> > +def check_format_
> > + bzip2_count):
> > + """Check that the given counts of each file type suit format 1.0.
> > +
> > + A 1.0 source must be native (with only one tar.gz), or have an orig.tar.gz
> > + and a diff.gz. It cannot use bzip2 compression.
> > + """
> > + if bzip2_count > 0:
> > + yield UploadError(
> > + "%s: is format 1.0 but uses bzip2 compression."
> > + % filename)
> > +
> > + if file_type_counts not in ({
> > + SourcePackageFi
> > + SourcePackageFi
> > + SourcePackageFi
> > + SourcePackageFi
> > + }, {
> > + SourcePackageFi
> > + SourcePackageFi
> > + SourcePackageFi
> > + SourcePackageFi
> > + }) or len(component_
>
> This is a little ugly. Can you do something like:
>
> valid_file_
> {
> SourcePackageFi
> ...
> },
> {
> ...
> },
> ]
>
> if file_type_counts not in valid_file_
>
> And the same for the other check funcs as well.
While it violates the style guide, it is indeed much more readable. I've
done that.
> [snip]
>
> Otherwise a great change!
>
> Let me see these few changes and then we'll get this puppy landed.
Thanks. The intermediate diff is attached.