Merge lp:~jml/libdep-service/apt-file-db into lp:libdep-service

Proposed by Jonathan Lange
Status: Merged
Approved by: James Westby
Approved revision: 98
Merged at revision: 67
Proposed branch: lp:~jml/libdep-service/apt-file-db
Merge into: lp:libdep-service
Diff against target: 284 lines (+245/-7)
4 files modified
djlibdep/aptfile.py (+107/-0)
djlibdep/tests/__init__.py (+1/-6)
djlibdep/tests/test_aptfile.py (+134/-0)
djlibdep/tests/test_pep8.py (+3/-1)
To merge this branch: bzr merge lp:~jml/libdep-service/apt-file-db
Reviewer Review Type Date Requested Status
James Westby (community) Approve
Review via email: mp+131181@code.launchpad.net

Commit message

Add apt-file to libdep-service

Description of the change

Work-in-progress

To post a comment you must log in.
lp:~jml/libdep-service/apt-file-db updated
94. By Jonathan Lange

Delete much, much code, relying purely on the collapsed version.

95. By Jonathan Lange

Extract the header stripping bit.

96. By Jonathan Lange

Fold things into the parameter.

97. By Jonathan Lange

Make it more like a unit test.

98. By Jonathan Lange

Bombard with edge-case unit tests.

Revision history for this message
James Westby (james-w) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'djlibdep/aptfile.py'
2--- djlibdep/aptfile.py 1970-01-01 00:00:00 +0000
3+++ djlibdep/aptfile.py 2012-10-24 14:09:21 +0000
4@@ -0,0 +1,107 @@
5+# Copyright (C) 2012 Canonical Ltd.
6+#
7+# This program is free software: you can redistribute it and/or modify
8+# it under the terms of the GNU Affero General Public License as published by
9+# the Free Software Foundation, version 3 of the License.
10+#
11+# This program is distributed in the hope that it will be useful,
12+# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+# GNU Affero General Public License for more details.
15+#
16+# You should have received a copy of the GNU Affero General Public License
17+# along with this program. If not, see <http://www.gnu.org/licenses/>.
18+
19+"""apt-file backend for djlibdep."""
20+
21+import os
22+
23+
24+def get_contents_url(archive, suite, architecture):
25+ """Get the URL for a contents file.
26+
27+ :param archive: The root URL of the archive,
28+ e.g. 'http://archive.ubuntu.com/ubuntu'.
29+ :param suite: Which suite to get results for.
30+ :param architecture: Which architecture. Normally 'i386' or 'amd64'.
31+ :return: A URL as a string.
32+ """
33+ return '%s/dists/%s/Contents-%s.gz' % (archive, suite, architecture)
34+
35+
36+def _is_header(line):
37+ """Is ``line`` a Contents header?"""
38+ return line[:4] == 'FILE' and line[-9:] == 'LOCATION\n'
39+
40+
41+# Since we want to find libraries, we try to find files that are installed to
42+# directories that ``ld`` searches when it wants to find a library.
43+# Unfortunately, the list of paths varies from system to system. However,
44+# this list tries to be complete.
45+LD_SEARCH_PATH = set([
46+ # standards
47+ "lib",
48+ "usr/lib",
49+ "usr/local/lib",
50+ # old biarch
51+ "lib32",
52+ "usr/lib32",
53+ # new multiarch
54+ "lib/i686-linux-gnu",
55+ "lib/i386-linux-gnu",
56+ "lib/x86_64-linux-gnu",
57+ "usr/lib/i386-linux-gnu",
58+ "usr/lib/i686-linux-gnu",
59+ "usr/lib/x86_64-linux-gnu",
60+ # ?
61+ "usr/lib/x86_64-linux-gnu/fakechroot",
62+ "usr/lib/x86_64-linux-gnu/mesa",
63+ "usr/lib/x86_64-linux-gnu/mesa-egl",
64+ "usr/lib/i386-linux-gnu/mesa",
65+ ])
66+
67+
68+def _strip_header(stream):
69+ for line in stream:
70+ if _is_header(line):
71+ return
72+
73+
74+def iter_libraries_in_contents(stream):
75+ """Iterate through a Contents file, yielding libraries and their packages.
76+
77+ Contents files are big. As a rough guide, the Contents-amd64.gz file for
78+ quantal is ~24MB compressed, about 384MB uncompressed, and contains
79+ roughly 4,000,000 records.
80+
81+ We want to be able to use the Contents file to tell us which packages
82+ provide which files, but we don't want to go through the whole file every
83+ time, and we don't want to store a full, parsed version in memory.
84+
85+ As such, this iterates over the file, yielding records which are probably
86+ records for libraries. It does this by looking for files that are
87+ immediately beneath one of a list of directories.
88+
89+ We cannot tell with certainty which records are for libraries and which
90+ are not. As such, this might yield records for things that are not
91+ libraries.
92+
93+ See <http://wiki.debian.org/RepositoryFormat#A.22Contents.22_indices>
94+ for details about the format of a Contents file.
95+
96+ :param stream: A file-like object from which we can read a Contents file.
97+ :return: An iterator of ``(package, library_name)`` tuples, where
98+ ``package`` is something that provides the library, and ``library_name``
99+ is the name of the library.
100+ """
101+ _strip_header(stream)
102+ for line in stream:
103+ try:
104+ path, locations = line.rsplit(None, 1)
105+ except ValueError:
106+ continue
107+ directory, filename = os.path.split(path)
108+ if directory in LD_SEARCH_PATH:
109+ for location in locations.split(','):
110+ package = os.path.basename(location)
111+ yield package, filename
112
113=== modified file 'djlibdep/tests/__init__.py'
114--- djlibdep/tests/__init__.py 2012-10-02 16:06:26 +0000
115+++ djlibdep/tests/__init__.py 2012-10-24 14:09:21 +0000
116@@ -19,13 +19,8 @@
117
118
119 TEST_MODULES = [
120- 'api',
121- 'interface',
122+ 'aptfile',
123 'pep8',
124- 'preflight',
125- 'test_double',
126- 'test_double_impl',
127- 'views',
128 ]
129
130 SUITE_FACTORY = OptimisingTestSuite
131
132=== added file 'djlibdep/tests/test_aptfile.py'
133--- djlibdep/tests/test_aptfile.py 1970-01-01 00:00:00 +0000
134+++ djlibdep/tests/test_aptfile.py 2012-10-24 14:09:21 +0000
135@@ -0,0 +1,134 @@
136+# Copyright (C) 2012 Canonical Ltd.
137+#
138+# This program is free software: you can redistribute it and/or modify
139+# it under the terms of the GNU Affero General Public License as published by
140+# the Free Software Foundation, version 3 of the License.
141+#
142+# This program is distributed in the hope that it will be useful,
143+# but WITHOUT ANY WARRANTY; without even the implied warranty of
144+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
145+# GNU Affero General Public License for more details.
146+#
147+# You should have received a copy of the GNU Affero General Public License
148+# along with this program. If not, see <http://www.gnu.org/licenses/>.
149+
150+from StringIO import StringIO
151+
152+from testtools import TestCase
153+
154+from .. import aptfile
155+
156+
157+class TestContentsURL(TestCase):
158+
159+ def test_url(self):
160+ archive = self.getUniqueString('archive')
161+ suite = self.getUniqueString('suite')
162+ arch = self.getUniqueString('arch')
163+ contents_url = aptfile.get_contents_url(archive, suite, arch)
164+ self.assertEqual(
165+ '%s/dists/%s/Contents-%s.gz' %
166+ (archive, suite, arch), contents_url)
167+
168+
169+class TestEndToEnd(TestCase):
170+
171+ def parse_contents(self, contents):
172+ return list(aptfile.iter_libraries_in_contents(StringIO(contents)))
173+
174+ def test_empty(self):
175+ self.assertEqual([], self.parse_contents(''))
176+
177+ def test_preface_only(self):
178+ contents = """
179+Random prose.
180+
181+This could go on forever.
182+
183+"""
184+ self.assertEqual([], self.parse_contents(contents))
185+
186+ def test_header_only(self):
187+ contents = "FILE LOCATION\n"
188+ self.assertEqual([], self.parse_contents(contents))
189+
190+ def test_immediate_header(self):
191+ contents = """
192+FILE LOCATION
193+bin/afio multiverse/utils/afio
194+lib/libfoo.so.1 main/foo-lib
195+"""
196+ self.assertEqual(
197+ [('foo-lib', 'libfoo.so.1')], self.parse_contents(contents))
198+
199+ def test_multiple_packages(self):
200+ contents = """
201+FILE LOCATION
202+bin/afio multiverse/utils/afio
203+lib/libfoo.so.1 main/foo-lib,universe/bar-lib
204+"""
205+ expected = [
206+ ('foo-lib', 'libfoo.so.1'),
207+ ('bar-lib', 'libfoo.so.1'),
208+ ]
209+ self.assertEqual(expected, self.parse_contents(contents))
210+
211+ def test_skips_blank_lines(self):
212+ contents = """
213+FILE LOCATION
214+bin/afio multiverse/utils/afio
215+
216+
217+lib/libfoo.so.1 main/foo-lib,universe/bar-lib
218+"""
219+ expected = [
220+ ('foo-lib', 'libfoo.so.1'),
221+ ('bar-lib', 'libfoo.so.1'),
222+ ]
223+ self.assertEqual(expected, self.parse_contents(contents))
224+
225+ def test_skips_corrupt_lines(self):
226+ contents = """
227+FILE LOCATION
228+bin/afio multiverse/utils/afio
229+oatuaeosahuaseou
230+# who really knows what crap goes in here
231+lib/libfoo.so.1 main/foo-lib,universe/bar-lib
232+"""
233+ expected = [
234+ ('foo-lib', 'libfoo.so.1'),
235+ ('bar-lib', 'libfoo.so.1'),
236+ ]
237+ self.assertEqual(expected, self.parse_contents(contents))
238+
239+ def test_no_trailing_line(self):
240+ contents = """
241+FILE LOCATION
242+bin/afio multiverse/utils/afio
243+oatuaeosahuaseou
244+# who really knows what crap goes in here
245+lib/libfoo.so.1 main/foo-lib"""
246+ self.assertEqual(
247+ [('foo-lib', 'libfoo.so.1')], self.parse_contents(contents))
248+
249+ def test_realistic_data(self):
250+ contents = """
251+Random prose.
252+
253+This could go on forever.
254+
255+FILE LOCATION
256+bin/afio multiverse/utils/afio
257+bin/busybox shells/busybox-static,universe/utils/busybox
258+lib/foo/bar/libfoo.so.1 main/foo-lib
259+lib/libfoo.so.1 main/foo-lib
260+lib/uncompress.so universe/libs/zlibc
261+lib/x86_64-linux-gnu/device-mapper/libdevmapper-event-lvm2.so.2.02 universe/admin/dmeventd
262+lib/x86_64-linux-gnu/ld-2.15.so libs/libc6
263+"""
264+ expected = [
265+ ('foo-lib', 'libfoo.so.1'),
266+ ('zlibc', 'uncompress.so'),
267+ ('libc6', 'ld-2.15.so'),
268+ ]
269+ self.assertEqual(expected, self.parse_contents(contents))
270
271=== modified file 'djlibdep/tests/test_pep8.py'
272--- djlibdep/tests/test_pep8.py 2012-07-02 13:37:16 +0000
273+++ djlibdep/tests/test_pep8.py 2012-10-24 14:09:21 +0000
274@@ -24,7 +24,9 @@
275
276 def test_pep8(self):
277 # Ignore bracket alignment errors. Emacs is right. PEP 8 is wrong.
278- self.assertThat(djlibdep, PEP8Compliant(ignore=['E123']))
279+ # Ignore too-long lines: sometimes we need them in literal strings.
280+ ignore = ['E123', 'E501']
281+ self.assertThat(djlibdep, PEP8Compliant(ignore=ignore))
282
283 def test_pyflakes(self):
284 base_dir = os.path.dirname(os.path.dirname(djlibdep.__file__))

Subscribers

People subscribed via source and target branches