Merge lp:~benji/launchpad/bug-622765 into lp:launchpad
- bug-622765
- Merge into devel
Status: | Merged | ||||
---|---|---|---|---|---|
Approved by: | Benji York | ||||
Approved revision: | no longer in the source branch. | ||||
Merged at revision: | 11477 | ||||
Proposed branch: | lp:~benji/launchpad/bug-622765 | ||||
Merge into: | lp:launchpad | ||||
Diff against target: |
250 lines (+73/-24) 4 files modified
lib/canonical/launchpad/scripts/tests/test_librarian_apache_log_parser.py (+2/-2) lib/lp/services/apachelogparser/base.py (+7/-3) lib/lp/services/apachelogparser/script.py (+9/-1) lib/lp/services/apachelogparser/tests/test_apachelogparser.py (+55/-18) |
||||
To merge this branch: | bzr merge lp:~benji/launchpad/bug-622765 | ||||
Related bugs: |
|
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Henning Eggers (community) | code | Approve | |
Review via email: mp+34077@code.launchpad.net |
Commit message
Description of the change
The config option "logparser_
This branch fixes the problem by keeping track of the number of lines parsed across calls to parse_file.
The related tests can be run with
bin/test -c test_apachelogp
No lint was reported by make lint.
Benji York (benji) wrote : | # |
On Mon, Aug 30, 2010 at 11:54 AM, Henning Eggers
<email address hidden> wrote:
> Review: Approve code
> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
>
> Hi Benji,
> thank you for this nice fix. It is basically good to land after you have
> considered a few comments of mine.
>
> review approve code
>
> Cheers,
> Henning
>
> Am 30.08.2010 17:03, schrieb Benji York:
>>
>> === modified file 'lib/canonical/
>> --- lib/canonical/
>> +++ lib/canonical/
>> @@ -102,7 +102,7 @@
>> '69.233.136.42 - - [13/Jun/
>> '/15018215/
>> '"https:/
>> - downloads, parsed_bytes = parse_file(
>> + downloads, parsed_bytes, ignored = parse_file(
>
> I am not sure "ignored" is the best choice here. I wondered at first if it
> means "ignored_bytes" or "ignored_lines". Would it hurt to call it what it is
> ("parsed_lines") and then simply ignore it?
Done.
>> === modified file 'lib/lp/
>> --- lib/lp/
>> +++ lib/lp/
>> @@ -8,6 +8,7 @@
>>
>> from zope.component import getUtility
>>
>> +from canonical.config import config
>> from lp.app.errors import NotFoundError
>> from lp.services.
>> create_
>> @@ -65,8 +66,15 @@
>>
>> self.setUpUtili
>> country_set = getUtility(
>> + parsed_lines = 0
>> + max_parsed_lines = getattr(
>> + config.launchpad, 'logparser_
>> for fd, position in files_to_parse:
>> - downloads, parsed_bytes = parse_file(
>> + # If we've used up our budget of lines to process, stop.
>> + if (max_parsed_lines is not None
>> + and parsed_lines >= max_parsed_lines):
>
> This line must be indented and the "and" should probably go on the previous
> line but multi-line "if" conditions are always bad to read because the
> dangling lines align with the body of the statement. Storing the boolean in a
> variable first and checking that in the statement often works better.
Done.
>> + # If we have already parsed some lines, then the number of lines
>> + # parsed will be passed in (parsed_lines argument) and parse_file will
>> + # take that number into account when determining if the maximum number
>> + # of lines to parse has been reached.
>
> Hm, this is a functional test and not documentation. This description should
> probably be found in the docstring of parse_file, should it not? If found
> there, it need not be explained in such detail here.
My intent was to explain the functioning of the test. I'm sure I would
appreciate a nice explanation of what's going on if I was reading the
test....
Preview Diff
1 | === modified file 'lib/canonical/launchpad/scripts/tests/test_librarian_apache_log_parser.py' | |||
2 | --- lib/canonical/launchpad/scripts/tests/test_librarian_apache_log_parser.py 2010-08-20 20:31:18 +0000 | |||
3 | +++ lib/canonical/launchpad/scripts/tests/test_librarian_apache_log_parser.py 2010-08-30 19:47:43 +0000 | |||
4 | @@ -102,7 +102,7 @@ | |||
5 | 102 | '69.233.136.42 - - [13/Jun/2008:14:55:22 +0100] "GET ' | 102 | '69.233.136.42 - - [13/Jun/2008:14:55:22 +0100] "GET ' |
6 | 103 | '/15018215/ul_logo_64x64.png HTTP/1.1" 200 2261 ' | 103 | '/15018215/ul_logo_64x64.png HTTP/1.1" 200 2261 ' |
7 | 104 | '"https://launchpad.net/~ubuntulite/+archive" "Mozilla"') | 104 | '"https://launchpad.net/~ubuntulite/+archive" "Mozilla"') |
9 | 105 | downloads, parsed_bytes = parse_file( | 105 | downloads, parsed_bytes, ignored = parse_file( |
10 | 106 | fd, start_position=0, logger=self.logger, | 106 | fd, start_position=0, logger=self.logger, |
11 | 107 | get_download_key=get_library_file_id) | 107 | get_download_key=get_library_file_id) |
12 | 108 | self.assertEqual( | 108 | self.assertEqual( |
13 | @@ -121,7 +121,7 @@ | |||
14 | 121 | fd = StringIO( | 121 | fd = StringIO( |
15 | 122 | '69.233.136.42 - - [13/Jun/2008:14:55:22 +0100] "GET / HTTP/1.1" ' | 122 | '69.233.136.42 - - [13/Jun/2008:14:55:22 +0100] "GET / HTTP/1.1" ' |
16 | 123 | '200 2261 "https://launchpad.net/~ubuntulite/+archive" "Mozilla"') | 123 | '200 2261 "https://launchpad.net/~ubuntulite/+archive" "Mozilla"') |
18 | 124 | downloads, parsed_bytes = parse_file( | 124 | downloads, parsed_bytes, ignored = parse_file( |
19 | 125 | fd, start_position=0, logger=self.logger, | 125 | fd, start_position=0, logger=self.logger, |
20 | 126 | get_download_key=get_library_file_id) | 126 | get_download_key=get_library_file_id) |
21 | 127 | self.assertEqual( | 127 | self.assertEqual( |
22 | 128 | 128 | ||
23 | === modified file 'lib/lp/services/apachelogparser/base.py' | |||
24 | --- lib/lp/services/apachelogparser/base.py 2010-08-27 18:41:01 +0000 | |||
25 | +++ lib/lp/services/apachelogparser/base.py 2010-08-30 19:47:43 +0000 | |||
26 | @@ -77,9 +77,14 @@ | |||
27 | 77 | return fd, file_size | 77 | return fd, file_size |
28 | 78 | 78 | ||
29 | 79 | 79 | ||
31 | 80 | def parse_file(fd, start_position, logger, get_download_key): | 80 | def parse_file(fd, start_position, logger, get_download_key, parsed_lines=0): |
32 | 81 | """Parse the given file starting on the given position. | 81 | """Parse the given file starting on the given position. |
33 | 82 | 82 | ||
34 | 83 | parsed_lines accepts the number of lines that have been parsed during | ||
35 | 84 | previous calls to this function so they can be taken into account against | ||
36 | 85 | max_parsed_lines. The total number of parsed lines is then returned so it | ||
37 | 86 | can be passed back to future calls to this function. | ||
38 | 87 | |||
39 | 83 | Return a dictionary mapping file_ids (from the librarian) to days to | 88 | Return a dictionary mapping file_ids (from the librarian) to days to |
40 | 84 | countries to number of downloads. | 89 | countries to number of downloads. |
41 | 85 | """ | 90 | """ |
42 | @@ -91,7 +96,6 @@ | |||
43 | 91 | 96 | ||
44 | 92 | geoip = getUtility(IGeoIP) | 97 | geoip = getUtility(IGeoIP) |
45 | 93 | downloads = {} | 98 | downloads = {} |
46 | 94 | parsed_lines = 0 | ||
47 | 95 | 99 | ||
48 | 96 | # Check for an optional max_parsed_lines config option. | 100 | # Check for an optional max_parsed_lines config option. |
49 | 97 | max_parsed_lines = getattr( | 101 | max_parsed_lines = getattr( |
50 | @@ -167,7 +171,7 @@ | |||
51 | 167 | logger.info('Parsed %d lines resulting in %d download stats.' % ( | 171 | logger.info('Parsed %d lines resulting in %d download stats.' % ( |
52 | 168 | parsed_lines, len(downloads))) | 172 | parsed_lines, len(downloads))) |
53 | 169 | 173 | ||
55 | 170 | return downloads, parsed_bytes | 174 | return downloads, parsed_bytes, parsed_lines |
56 | 171 | 175 | ||
57 | 172 | 176 | ||
58 | 173 | def create_or_update_parsedlog_entry(first_line, parsed_bytes): | 177 | def create_or_update_parsedlog_entry(first_line, parsed_bytes): |
59 | 174 | 178 | ||
60 | === modified file 'lib/lp/services/apachelogparser/script.py' | |||
61 | --- lib/lp/services/apachelogparser/script.py 2010-08-27 18:41:01 +0000 | |||
62 | +++ lib/lp/services/apachelogparser/script.py 2010-08-30 19:47:43 +0000 | |||
63 | @@ -8,6 +8,7 @@ | |||
64 | 8 | 8 | ||
65 | 9 | from zope.component import getUtility | 9 | from zope.component import getUtility |
66 | 10 | 10 | ||
67 | 11 | from canonical.config import config | ||
68 | 11 | from lp.app.errors import NotFoundError | 12 | from lp.app.errors import NotFoundError |
69 | 12 | from lp.services.apachelogparser.base import ( | 13 | from lp.services.apachelogparser.base import ( |
70 | 13 | create_or_update_parsedlog_entry, | 14 | create_or_update_parsedlog_entry, |
71 | @@ -65,8 +66,15 @@ | |||
72 | 65 | 66 | ||
73 | 66 | self.setUpUtilities() | 67 | self.setUpUtilities() |
74 | 67 | country_set = getUtility(ICountrySet) | 68 | country_set = getUtility(ICountrySet) |
75 | 69 | parsed_lines = 0 | ||
76 | 70 | max_parsed_lines = getattr( | ||
77 | 71 | config.launchpad, 'logparser_max_parsed_lines', None) | ||
78 | 72 | max_is_set = max_parsed_lines is not None | ||
79 | 68 | for fd, position in files_to_parse: | 73 | for fd, position in files_to_parse: |
81 | 69 | downloads, parsed_bytes = parse_file( | 74 | # If we've used up our budget of lines to process, stop. |
82 | 75 | if (max_is_set and parsed_lines >= max_parsed_lines): | ||
83 | 76 | break | ||
84 | 77 | downloads, parsed_bytes, parsed_lines = parse_file( | ||
85 | 70 | fd, position, self.logger, self.getDownloadKey) | 78 | fd, position, self.logger, self.getDownloadKey) |
86 | 71 | # Use a while loop here because we want to pop items from the dict | 79 | # Use a while loop here because we want to pop items from the dict |
87 | 72 | # in order to free some memory as we go along. This is a good | 80 | # in order to free some memory as we go along. This is a good |
88 | 73 | 81 | ||
89 | === modified file 'lib/lp/services/apachelogparser/tests/test_apachelogparser.py' | |||
90 | --- lib/lp/services/apachelogparser/tests/test_apachelogparser.py 2010-08-27 18:41:01 +0000 | |||
91 | +++ lib/lp/services/apachelogparser/tests/test_apachelogparser.py 2010-08-30 19:47:43 +0000 | |||
92 | @@ -6,7 +6,6 @@ | |||
93 | 6 | import os | 6 | import os |
94 | 7 | from StringIO import StringIO | 7 | from StringIO import StringIO |
95 | 8 | import tempfile | 8 | import tempfile |
96 | 9 | import textwrap | ||
97 | 10 | from operator import itemgetter | 9 | from operator import itemgetter |
98 | 11 | import unittest | 10 | import unittest |
99 | 12 | 11 | ||
100 | @@ -125,17 +124,18 @@ | |||
101 | 125 | 124 | ||
102 | 126 | def test_parsing(self): | 125 | def test_parsing(self): |
103 | 127 | # The parse_file() function returns a tuple containing a dict (mapping | 126 | # The parse_file() function returns a tuple containing a dict (mapping |
106 | 128 | # days and library file IDs to number of downloads) and the total | 127 | # days and library file IDs to number of downloads), the total number |
107 | 129 | # number of bytes that have been parsed from this file. In our sample | 128 | # of bytes that have been parsed from this file, and the running total |
108 | 129 | # of lines parsed (for testing against the maximum). In our sample | ||
109 | 130 | # log, the file with ID 8196569 has been downloaded twice (once from | 130 | # log, the file with ID 8196569 has been downloaded twice (once from |
113 | 131 | # Argentina and once from Japan) and the files with ID 12060796 | 131 | # Argentina and once from Japan) and the files with ID 12060796 and |
114 | 132 | # and 9096290 have been downloaded once. The file with ID 15018215 | 132 | # 9096290 have been downloaded once. The file with ID 15018215 has |
115 | 133 | # has also been downloaded once (last line of the sample log), but | 133 | # also been downloaded once (last line of the sample log), but |
116 | 134 | # parse_file() always skips the last line as it may be truncated, so | 134 | # parse_file() always skips the last line as it may be truncated, so |
117 | 135 | # it doesn't show up in the dict returned. | 135 | # it doesn't show up in the dict returned. |
118 | 136 | fd = open(os.path.join( | 136 | fd = open(os.path.join( |
119 | 137 | here, 'apache-log-files', 'launchpadlibrarian.net.access-log')) | 137 | here, 'apache-log-files', 'launchpadlibrarian.net.access-log')) |
121 | 138 | downloads, parsed_bytes = parse_file( | 138 | downloads, parsed_bytes, parsed_lines = parse_file( |
122 | 139 | fd, start_position=0, logger=self.logger, | 139 | fd, start_position=0, logger=self.logger, |
123 | 140 | get_download_key=get_path_download_key) | 140 | get_download_key=get_path_download_key) |
124 | 141 | self.assertEqual( | 141 | self.assertEqual( |
125 | @@ -159,7 +159,7 @@ | |||
126 | 159 | # line without worrying about whether or not it's been truncated. | 159 | # line without worrying about whether or not it's been truncated. |
127 | 160 | fd = open(os.path.join( | 160 | fd = open(os.path.join( |
128 | 161 | here, 'apache-log-files', 'launchpadlibrarian.net.access-log')) | 161 | here, 'apache-log-files', 'launchpadlibrarian.net.access-log')) |
130 | 162 | downloads, parsed_bytes = parse_file( | 162 | downloads, parsed_bytes, parsed_lines = parse_file( |
131 | 163 | fd, start_position=self._getLastLineStart(fd), logger=self.logger, | 163 | fd, start_position=self._getLastLineStart(fd), logger=self.logger, |
132 | 164 | get_download_key=get_path_download_key) | 164 | get_download_key=get_path_download_key) |
133 | 165 | self.assertEqual( | 165 | self.assertEqual( |
134 | @@ -177,7 +177,7 @@ | |||
135 | 177 | # parsed up to the line before the one where the failure occurred. | 177 | # parsed up to the line before the one where the failure occurred. |
136 | 178 | # Here we force an unexpected error on the first line. | 178 | # Here we force an unexpected error on the first line. |
137 | 179 | fd = StringIO('Not a log') | 179 | fd = StringIO('Not a log') |
139 | 180 | downloads, parsed_bytes = parse_file( | 180 | downloads, parsed_bytes, parsed_lines = parse_file( |
140 | 181 | fd, start_position=0, logger=self.logger, | 181 | fd, start_position=0, logger=self.logger, |
141 | 182 | get_download_key=get_path_download_key) | 182 | get_download_key=get_path_download_key) |
142 | 183 | self.assertIn('Error', self.logger.buffer.getvalue()) | 183 | self.assertIn('Error', self.logger.buffer.getvalue()) |
143 | @@ -188,7 +188,7 @@ | |||
144 | 188 | """Assert that responses with the given status are ignored.""" | 188 | """Assert that responses with the given status are ignored.""" |
145 | 189 | fd = StringIO( | 189 | fd = StringIO( |
146 | 190 | self.sample_line % dict(status=status, method='GET')) | 190 | self.sample_line % dict(status=status, method='GET')) |
148 | 191 | downloads, parsed_bytes = parse_file( | 191 | downloads, parsed_bytes, parsed_lines = parse_file( |
149 | 192 | fd, start_position=0, logger=self.logger, | 192 | fd, start_position=0, logger=self.logger, |
150 | 193 | get_download_key=get_path_download_key) | 193 | get_download_key=get_path_download_key) |
151 | 194 | self.assertEqual( | 194 | self.assertEqual( |
152 | @@ -213,7 +213,7 @@ | |||
153 | 213 | """Assert that requests with the given method are ignored.""" | 213 | """Assert that requests with the given method are ignored.""" |
154 | 214 | fd = StringIO( | 214 | fd = StringIO( |
155 | 215 | self.sample_line % dict(status='200', method=method)) | 215 | self.sample_line % dict(status='200', method=method)) |
157 | 216 | downloads, parsed_bytes = parse_file( | 216 | downloads, parsed_bytes, parsed_lines = parse_file( |
158 | 217 | fd, start_position=0, logger=self.logger, | 217 | fd, start_position=0, logger=self.logger, |
159 | 218 | get_download_key=get_path_download_key) | 218 | get_download_key=get_path_download_key) |
160 | 219 | self.assertEqual( | 219 | self.assertEqual( |
161 | @@ -231,7 +231,7 @@ | |||
162 | 231 | def test_normal_request_is_not_ignored(self): | 231 | def test_normal_request_is_not_ignored(self): |
163 | 232 | fd = StringIO( | 232 | fd = StringIO( |
164 | 233 | self.sample_line % dict(status=200, method='GET')) | 233 | self.sample_line % dict(status=200, method='GET')) |
166 | 234 | downloads, parsed_bytes = parse_file( | 234 | downloads, parsed_bytes, parsed_lines = parse_file( |
167 | 235 | fd, start_position=0, logger=self.logger, | 235 | fd, start_position=0, logger=self.logger, |
168 | 236 | get_download_key=get_path_download_key) | 236 | get_download_key=get_path_download_key) |
169 | 237 | self.assertEqual( | 237 | self.assertEqual( |
170 | @@ -249,22 +249,20 @@ | |||
171 | 249 | # lines. | 249 | # lines. |
172 | 250 | config.push( | 250 | config.push( |
173 | 251 | 'log_parser config', | 251 | 'log_parser config', |
178 | 252 | textwrap.dedent('''\ | 252 | '[launchpad]\nlogparser_max_parsed_lines: 2') |
175 | 253 | [launchpad] | ||
176 | 254 | logparser_max_parsed_lines: 2 | ||
177 | 255 | ''')) | ||
179 | 256 | self.addCleanup(config.pop, 'log_parser config') | 253 | self.addCleanup(config.pop, 'log_parser config') |
180 | 257 | fd = open(os.path.join( | 254 | fd = open(os.path.join( |
181 | 258 | here, 'apache-log-files', 'launchpadlibrarian.net.access-log')) | 255 | here, 'apache-log-files', 'launchpadlibrarian.net.access-log')) |
182 | 259 | self.addCleanup(fd.close) | 256 | self.addCleanup(fd.close) |
183 | 260 | 257 | ||
185 | 261 | downloads, parsed_bytes = parse_file( | 258 | downloads, parsed_bytes, parsed_lines = parse_file( |
186 | 262 | fd, start_position=0, logger=self.logger, | 259 | fd, start_position=0, logger=self.logger, |
187 | 263 | get_download_key=get_path_download_key) | 260 | get_download_key=get_path_download_key) |
188 | 264 | 261 | ||
189 | 265 | # We have initially parsed only the first two lines of data, | 262 | # We have initially parsed only the first two lines of data, |
190 | 266 | # corresponding to one download (the first line is a 404 and | 263 | # corresponding to one download (the first line is a 404 and |
191 | 267 | # so ignored). | 264 | # so ignored). |
192 | 265 | self.assertEqual(parsed_lines, 2) | ||
193 | 268 | date = datetime(2008, 6, 13) | 266 | date = datetime(2008, 6, 13) |
194 | 269 | self.assertContentEqual( | 267 | self.assertContentEqual( |
195 | 270 | downloads.items(), | 268 | downloads.items(), |
196 | @@ -276,7 +274,7 @@ | |||
197 | 276 | 274 | ||
198 | 277 | # And the subsequent parse will be for the 3rd and 4th lines, | 275 | # And the subsequent parse will be for the 3rd and 4th lines, |
199 | 278 | # corresponding to two downloads of the same file. | 276 | # corresponding to two downloads of the same file. |
201 | 279 | downloads, parsed_bytes = parse_file( | 277 | downloads, parsed_bytes, parsed_lines = parse_file( |
202 | 280 | fd, start_position=parsed_bytes, logger=self.logger, | 278 | fd, start_position=parsed_bytes, logger=self.logger, |
203 | 281 | get_download_key=get_path_download_key) | 279 | get_download_key=get_path_download_key) |
204 | 282 | self.assertContentEqual( | 280 | self.assertContentEqual( |
205 | @@ -285,6 +283,45 @@ | |||
206 | 285 | ('/8196569/mediumubuntulogo.png', {date: {'AR': 1}})]) | 283 | ('/8196569/mediumubuntulogo.png', {date: {'AR': 1}})]) |
207 | 286 | self.assertEqual(parsed_bytes, sum(line_lengths[:4])) | 284 | self.assertEqual(parsed_bytes, sum(line_lengths[:4])) |
208 | 287 | 285 | ||
209 | 286 | def test_max_parsed_lines_exceeded(self): | ||
210 | 287 | # Show that if a non-zero parsed_lines is passed in, the number of | ||
211 | 288 | # lines parsed will be less than it would otherwise have been. | ||
212 | 289 | |||
213 | 290 | # The max_parsed_lines config option limits the number of parsed | ||
214 | 291 | # lines. | ||
215 | 292 | config.push( | ||
216 | 293 | 'log_parser config', | ||
217 | 294 | '[launchpad]\nlogparser_max_parsed_lines: 2') | ||
218 | 295 | self.addCleanup(config.pop, 'log_parser config') | ||
219 | 296 | fd = open(os.path.join( | ||
220 | 297 | here, 'apache-log-files', 'launchpadlibrarian.net.access-log')) | ||
221 | 298 | self.addCleanup(fd.close) | ||
222 | 299 | |||
223 | 300 | # We want to start parsing on line 2 so we will have a value in | ||
224 | 301 | # "downloads" to make a positive assertion about. (The first line is | ||
225 | 302 | # a 404 so wouldn't generate any output.) | ||
226 | 303 | start_position = len(fd.readline()) | ||
227 | 304 | |||
228 | 305 | # If we have already parsed some lines, then the number of lines | ||
229 | 306 | # parsed will be passed in (parsed_lines argument) and parse_file will | ||
230 | 307 | # take that number into account when determining if the maximum number | ||
231 | 308 | # of lines to parse has been reached. | ||
232 | 309 | parsed_lines = 1 | ||
233 | 310 | downloads, parsed_bytes, parsed_lines = parse_file( | ||
234 | 311 | fd, start_position=start_position, logger=self.logger, | ||
235 | 312 | get_download_key=get_path_download_key, parsed_lines=parsed_lines) | ||
236 | 313 | |||
237 | 314 | # The total number of lines parsed during the run (1 line) plus the | ||
238 | 315 | # number of lines parsed previously (1 line, as passed in via | ||
239 | 316 | # parsed_lines) is returned. | ||
240 | 317 | self.assertEqual(parsed_lines, 2) | ||
241 | 318 | # Since we told parse_file that we had already parsed 1 line and the | ||
242 | 319 | # limit is 2 lines, it only parsed a single line. | ||
243 | 320 | date = datetime(2008, 6, 13) | ||
244 | 321 | self.assertContentEqual( | ||
245 | 322 | downloads.items(), | ||
246 | 323 | [('/9096290/me-tv-icon-14x14.png', {date: {'AU': 1}})]) | ||
247 | 324 | |||
248 | 288 | 325 | ||
249 | 289 | class TestParsedFilesDetection(TestCase): | 326 | class TestParsedFilesDetection(TestCase): |
250 | 290 | """Test the detection of already parsed logs.""" | 327 | """Test the detection of already parsed logs.""" |
-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1
Hi Benji,
thank you for this nice fix. It is basically good to land after you have
considered a few comments of mine.
review approve code
Cheers,
Henning
Am 30.08.2010 17:03, schrieb Benji York: launchpad/ scripts/ tests/test_ librarian_ apache_ log_parser. py' launchpad/ scripts/ tests/test_ librarian_ apache_ log_parser. py 2010-08-20 20:31:18 +0000 launchpad/ scripts/ tests/test_ librarian_ apache_ log_parser. py 2010-08-30 15:03:44 +0000 2008:14: 55:22 +0100] "GET ' ul_logo_ 64x64.png HTTP/1.1" 200 2261 ' /launchpad. net/~ubuntulite /+archive" "Mozilla"')
>
> === modified file 'lib/canonical/
> --- lib/canonical/
> +++ lib/canonical/
> @@ -102,7 +102,7 @@
> '69.233.136.42 - - [13/Jun/
> '/15018215/
> '"https:/
> - downloads, parsed_bytes = parse_file(
> + downloads, parsed_bytes, ignored = parse_file(
I am not sure "ignored" is the best choice here. I wondered at first if it
means "ignored_bytes" or "ignored_lines". Would it hurt to call it what it is
("parsed_lines") and then simply ignore it?
There are multiple uses of "ignored" in here, as you know. ;-)
> fd, start_position=0, logger=self.logger, key=get_ library_ file_id) 2008:14: 55:22 +0100] "GET / HTTP/1.1" ' /launchpad. net/~ubuntulite /+archive" "Mozilla"') key=get_ library_ file_id) services/ apachelogparser /base.py' services/ apachelogparser /base.py 2010-08-27 18:41:01 +0000 services/ apachelogparser /base.py 2010-08-30 15:03:44 +0000
> get_download_
> self.assertEqual(
> @@ -121,7 +121,7 @@
> fd = StringIO(
> '69.233.136.42 - - [13/Jun/
> '200 2261 "https:/
> - downloads, parsed_bytes = parse_file(
> + downloads, parsed_bytes, ignored = parse_file(
> fd, start_position=0, logger=self.logger,
> get_download_
> self.assertEqual(
>
> === modified file 'lib/lp/
> --- lib/lp/
> +++ lib/lp/
Very nice. Thank you.
[...]
> === modified file 'lib/lp/ services/ apachelogparser /script. py' services/ apachelogparser /script. py 2010-08-27 18:41:01 +0000 services/ apachelogparser /script. py 2010-08-30 15:03:44 +0000 apachelogparser .base import ( or_update_ parsedlog_ entry, ties() ICountrySet) max_parsed_ lines', None)
> --- lib/lp/
> +++ lib/lp/
> @@ -8,6 +8,7 @@
>
> from zope.component import getUtility
>
> +from canonical.config import config
> from lp.app.errors import NotFoundError
> from lp.services.
> create_
> @@ -65,8 +66,15 @@
>
> self.setUpUtili
> country_set = getUtility(
> + parsed_lines = 0
> + max_parsed_lines = getattr(
> + config.launchpad, 'logparser_
> for fd, position in files_to_parse:
> - downloads, parsed_bytes = parse_file(
> + # If we've used up our budget of lines to process, stop.
> + if (max_parsed_lines is not None
> + and parsed_lines >= max_parsed_lines):
This line must be indented and the "and" should probably go on the previous
line but multi-line "if" conditions are always bad to read because the
dangling lines align with the body of the statement. Storing the boolean in a
variable fi...