Merge lp:~jtv/launchpad/xpi-dtd-parser into lp:launchpad

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: no longer in the source branch.
Merged at revision: 11173
Proposed branch: lp:~jtv/launchpad/xpi-dtd-parser
Merge into: lp:launchpad
Diff against target: 370 lines (+152/-134)
5 files modified
lib/lp/translations/scripts/validate_translations_file.py (+2/-1)
lib/lp/translations/utilities/gettext_po_importer.py (+4/-4)
lib/lp/translations/utilities/mozilla_dtd_parser.py (+144/-0)
lib/lp/translations/utilities/mozilla_xpi_importer.py (+1/-128)
lib/lp/translations/utilities/tests/test_xpi_dtd_format.py (+1/-1)
To merge this branch: bzr merge lp:~jtv/launchpad/xpi-dtd-parser
Reviewer Review Type Date Requested Status
Leonard Richardson (community) Approve
Launchpad code reviewers code Pending
Review via email: mp+30383@code.launchpad.net

Commit message

Extract XPI DTD parser.

Description of the change

= DTD Parser =

This extracts the code for parsing an XPI DTD file into a file of its own. It's just cleaner for now, but it also looks like we'll want to build a full-blown importer around this parser later.

To test:
{{{
./bin/test -vvc -m lp.translations -t dtd
}}}

Jeroen

To post a comment you must log in.
Revision history for this message
Leonard Richardson (leonardr) wrote :

+1

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/translations/scripts/validate_translations_file.py'
2--- lib/lp/translations/scripts/validate_translations_file.py 2010-01-06 06:04:54 +0000
3+++ lib/lp/translations/scripts/validate_translations_file.py 2010-07-20 11:02:51 +0000
4@@ -15,8 +15,9 @@
5
6 from canonical.launchpad import scripts
7 from lp.translations.utilities.gettext_po_parser import POParser
8+from lp.translations.utilities.mozilla_dtd_parser import DtdFile
9 from lp.translations.utilities.mozilla_xpi_importer import (
10- DtdFile, MozillaZipImportParser)
11+ MozillaZipImportParser)
12 from lp.translations.utilities.xpi_manifest import XpiManifest
13
14
15
16=== modified file 'lib/lp/translations/utilities/gettext_po_importer.py'
17--- lib/lp/translations/utilities/gettext_po_importer.py 2009-07-17 00:26:05 +0000
18+++ lib/lp/translations/utilities/gettext_po_importer.py 2010-07-20 11:02:51 +0000
19@@ -1,19 +1,19 @@
20-# Copyright 2009 Canonical Ltd. This software is licensed under the
21+# Copyright 2009-2010 Canonical Ltd. This software is licensed under the
22 # GNU Affero General Public License version 3 (see the file LICENSE).
23
24 __metaclass__ = type
25
26 __all__ = [
27- 'GettextPOImporter'
28+ 'GettextPOImporter',
29 ]
30
31 from zope.component import getUtility
32 from zope.interface import implements
33
34+from lp.translations.interfaces.translationfileformat import (
35+ TranslationFileFormat)
36 from lp.translations.interfaces.translationimporter import (
37 ITranslationFormatImporter)
38-from lp.translations.interfaces.translationfileformat import (
39- TranslationFileFormat)
40 from lp.translations.utilities.gettext_po_parser import (
41 POParser, POHeader)
42 from canonical.librarian.interfaces import ILibrarianClient
43
44=== added file 'lib/lp/translations/utilities/mozilla_dtd_parser.py'
45--- lib/lp/translations/utilities/mozilla_dtd_parser.py 1970-01-01 00:00:00 +0000
46+++ lib/lp/translations/utilities/mozilla_dtd_parser.py 2010-07-20 11:02:51 +0000
47@@ -0,0 +1,144 @@
48+# Copyright 2010 Canonical Ltd. This software is licensed under the
49+# GNU Affero General Public License version 3 (see the file LICENSE).
50+
51+"""Importer for DTD files as found in XPI archives."""
52+
53+__metaclass__ = type
54+__all__ = [
55+ 'DtdFile'
56+ ]
57+
58+from old_xmlplus.parsers.xmlproc import dtdparser, xmldtd, utils
59+
60+from lp.translations.interfaces.translationimporter import (
61+ TranslationFormatInvalidInputError,
62+ TranslationFormatSyntaxError)
63+from lp.translations.utilities.translation_common_format import (
64+ TranslationMessageData)
65+from lp.translations.interfaces.translations import TranslationConstants
66+
67+
68+class MozillaDtdConsumer(xmldtd.WFCDTD):
69+ """Mozilla DTD translatable message parser.
70+
71+ msgids are stored as entities. This class extracts it along
72+ with translations, comments and source references.
73+ """
74+ def __init__(self, parser, filename, chrome_path, messages):
75+ self.started = False
76+ self.last_comment = None
77+ self.chrome_path = chrome_path
78+ self.messages = messages
79+ self.filename = filename
80+ xmldtd.WFCDTD.__init__(self, parser)
81+
82+ def dtd_start(self):
83+ """See `xmldtd.WFCDTD`."""
84+ self.started = True
85+
86+ def dtd_end(self):
87+ """See `xmldtd.WFCDTD`."""
88+ self.started = False
89+
90+ def handle_comment(self, contents):
91+ """See `xmldtd.WFCDTD`."""
92+ if not self.started:
93+ return
94+
95+ if self.last_comment is not None:
96+ self.last_comment += contents
97+ elif len(contents) > 0:
98+ self.last_comment = contents
99+
100+ if self.last_comment and not self.last_comment.endswith('\n'):
101+ # Comments must end always with a new line.
102+ self.last_comment += '\n'
103+
104+ def new_general_entity(self, name, value):
105+ """See `xmldtd.WFCDTD`."""
106+ if not self.started:
107+ return
108+
109+ message = TranslationMessageData()
110+ message.msgid_singular = name
111+ # CarlosPerelloMarin 20070326: xmldtd parser does an inline
112+ # parsing which means that the content is all in a single line so we
113+ # don't have a way to show the line number with the source reference.
114+ message.file_references_list = ["%s(%s)" % (self.filename, name)]
115+ message.addTranslation(TranslationConstants.SINGULAR_FORM, value)
116+ message.singular_text = value
117+ message.context = self.chrome_path
118+ message.source_comment = self.last_comment
119+ self.messages.append(message)
120+ self.started += 1
121+ self.last_comment = None
122+
123+
124+class DtdErrorHandler(utils.ErrorCounter):
125+ """Error handler for the DTD parser."""
126+ filename = None
127+
128+ def error(self, msg):
129+ raise TranslationFormatSyntaxError(
130+ filename=self.filename, message=msg)
131+
132+ def fatal(self, msg):
133+ raise TranslationFormatInvalidInputError(
134+ filename=self.filename, message=msg)
135+
136+
137+class DummyDtdFile:
138+ """"File" returned when DTD SYSTEM entity tries to include a file."""
139+ done = False
140+
141+ def read(self, *args, **kwargs):
142+ """Minimally satisfy attempt to read an included DTD file."""
143+ if self.done:
144+ return ''
145+ else:
146+ self.done = True
147+ return '<!-- SYSTEM entities not supported. -->'
148+
149+ def close(self):
150+ """Satisfy attempt to close file."""
151+ pass
152+
153+
154+class DtdInputSourceFactoryStub:
155+ """Replace the class the DTD parser uses to include other DTD files."""
156+
157+ def create_input_source(self, sysid):
158+ """Minimally satisfy attempt to open an included DTD file.
159+
160+ This is called when the DTD parser hits a SYSTEM entity.
161+ """
162+ return DummyDtdFile()
163+
164+
165+class DtdFile:
166+ """Class for reading translatable messages from a .dtd file.
167+
168+ It uses DTDParser which fills self.messages with parsed messages.
169+ """
170+ def __init__(self, filename, chrome_path, content):
171+ self.messages = []
172+ self.filename = filename
173+ self.chrome_path = chrome_path
174+
175+ # .dtd files are supposed to be using UTF-8 encoding, if the file is
176+ # using another encoding, it's against the standard so we reject it
177+ try:
178+ content = content.decode('utf-8')
179+ except UnicodeDecodeError:
180+ raise TranslationFormatInvalidInputError, (
181+ 'Content is not valid UTF-8 text')
182+
183+ error_handler = DtdErrorHandler()
184+ error_handler.filename = filename
185+
186+ parser = dtdparser.DTDParser()
187+ parser.set_error_handler(error_handler)
188+ parser.set_inputsource_factory(DtdInputSourceFactoryStub())
189+ dtd = MozillaDtdConsumer(parser, filename, chrome_path, self.messages)
190+ parser.set_dtd_consumer(dtd)
191+ parser.parse_string(content)
192
193=== modified file 'lib/lp/translations/utilities/mozilla_xpi_importer.py'
194--- lib/lp/translations/utilities/mozilla_xpi_importer.py 2010-01-05 13:44:13 +0000
195+++ lib/lp/translations/utilities/mozilla_xpi_importer.py 2010-07-20 11:02:51 +0000
196@@ -4,7 +4,6 @@
197 __metaclass__ = type
198
199 __all__ = [
200- 'DtdFile',
201 'MozillaXpiImporter',
202 'MozillaZipImportParser',
203 ]
204@@ -12,8 +11,6 @@
205 from cStringIO import StringIO
206 import textwrap
207
208-from old_xmlplus.parsers.xmlproc import dtdparser, xmldtd, utils
209-
210 from zope.component import getUtility
211 from zope.interface import implements
212
213@@ -27,13 +24,13 @@
214 from lp.translations.utilities.translation_common_format import (
215 TranslationFileData,
216 TranslationMessageData)
217+from lp.translations.utilities.mozilla_dtd_parser import DtdFile
218 from lp.translations.utilities.mozilla_zip import (
219 MozillaZipTraversal)
220 from lp.translations.utilities.xpi_header import XpiHeader
221 from canonical.librarian.interfaces import ILibrarianClient
222
223
224-
225 def add_source_comment(message, comment):
226 """Add the given comment inside message.source_comment."""
227 if message.source_comment:
228@@ -160,130 +157,6 @@
229 self.messages.append(message)
230
231
232-class MozillaDtdConsumer(xmldtd.WFCDTD):
233- """Mozilla DTD translatable message parser.
234-
235- msgids are stored as entities. This class extracts it along
236- with translations, comments and source references.
237- """
238- def __init__(self, parser, filename, chrome_path, messages):
239- self.started = False
240- self.last_comment = None
241- self.chrome_path = chrome_path
242- self.messages = messages
243- self.filename = filename
244- xmldtd.WFCDTD.__init__(self, parser)
245-
246- def dtd_start(self):
247- """See `xmldtd.WFCDTD`."""
248- self.started = True
249-
250- def dtd_end(self):
251- """See `xmldtd.WFCDTD`."""
252- self.started = False
253-
254- def handle_comment(self, contents):
255- """See `xmldtd.WFCDTD`."""
256- if not self.started:
257- return
258-
259- if self.last_comment is not None:
260- self.last_comment += contents
261- elif len(contents) > 0:
262- self.last_comment = contents
263-
264- if self.last_comment and not self.last_comment.endswith('\n'):
265- # Comments must end always with a new line.
266- self.last_comment += '\n'
267-
268- def new_general_entity(self, name, value):
269- """See `xmldtd.WFCDTD`."""
270- if not self.started:
271- return
272-
273- message = TranslationMessageData()
274- message.msgid_singular = name
275- # CarlosPerelloMarin 20070326: xmldtd parser does an inline
276- # parsing which means that the content is all in a single line so we
277- # don't have a way to show the line number with the source reference.
278- message.file_references_list = ["%s(%s)" % (self.filename, name)]
279- message.addTranslation(TranslationConstants.SINGULAR_FORM, value)
280- message.singular_text = value
281- message.context = self.chrome_path
282- message.source_comment = self.last_comment
283- self.messages.append(message)
284- self.started += 1
285- self.last_comment = None
286-
287-
288-class DtdErrorHandler(utils.ErrorCounter):
289- """Error handler for the DTD parser."""
290- filename = None
291-
292- def error(self, msg):
293- raise TranslationFormatSyntaxError(
294- filename=self.filename, message=msg)
295-
296- def fatal(self, msg):
297- raise TranslationFormatInvalidInputError(
298- filename=self.filename, message=msg)
299-
300-
301-class DummyDtdFile:
302- """"File" returned when DTD SYSTEM entity tries to include a file."""
303- done = False
304-
305- def read(self, *args, **kwargs):
306- """Minimally satisfy attempt to read an included DTD file."""
307- if self.done:
308- return ''
309- else:
310- self.done = True
311- return '<!-- SYSTEM entities not supported. -->'
312-
313- def close(self):
314- """Satisfy attempt to close file."""
315- pass
316-
317-
318-class DtdInputSourceFactoryStub:
319- """Replace the class the DTD parser uses to include other DTD files."""
320-
321- def create_input_source(self, sysid):
322- """Minimally satisfy attempt to open an included DTD file.
323-
324- This is called when the DTD parser hits a SYSTEM entity.
325- """
326- return DummyDtdFile()
327-
328-
329-class DtdFile:
330- """Class for reading translatable messages from a .dtd file.
331-
332- It uses DTDParser which fills self.messages with parsed messages.
333- """
334- def __init__(self, filename, chrome_path, content):
335- self.messages = []
336- self.filename = filename
337- self.chrome_path = chrome_path
338-
339- # .dtd files are supposed to be using UTF-8 encoding, if the file is
340- # using another encoding, it's against the standard so we reject it
341- try:
342- content = content.decode('utf-8')
343- except UnicodeDecodeError:
344- raise TranslationFormatInvalidInputError, (
345- 'Content is not valid UTF-8 text')
346-
347- error_handler = DtdErrorHandler()
348- error_handler.filename = filename
349-
350- parser = dtdparser.DTDParser()
351- parser.set_error_handler(error_handler)
352- parser.set_inputsource_factory(DtdInputSourceFactoryStub())
353- dtd = MozillaDtdConsumer(parser, filename, chrome_path, self.messages)
354- parser.set_dtd_consumer(dtd)
355- parser.parse_string(content)
356
357
358 def valid_property_msgid(msgid):
359
360=== modified file 'lib/lp/translations/utilities/tests/test_xpi_dtd_format.py'
361--- lib/lp/translations/utilities/tests/test_xpi_dtd_format.py 2009-07-17 00:26:05 +0000
362+++ lib/lp/translations/utilities/tests/test_xpi_dtd_format.py 2010-07-20 11:02:51 +0000
363@@ -5,7 +5,7 @@
364
365 import unittest
366
367-from lp.translations.utilities.mozilla_xpi_importer import DtdFile
368+from lp.translations.utilities.mozilla_dtd_parser import DtdFile
369 from lp.translations.interfaces.translationimporter import (
370 TranslationFormatInvalidInputError)
371