Merge lp:~jtv/launchpad/xpi-dtd-parser into lp:launchpad

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: no longer in the source branch.
Merged at revision: 11173
Proposed branch: lp:~jtv/launchpad/xpi-dtd-parser
Merge into: lp:launchpad
Diff against target: 370 lines (+152/-134)
5 files modified
lib/lp/translations/scripts/validate_translations_file.py (+2/-1)
lib/lp/translations/utilities/gettext_po_importer.py (+4/-4)
lib/lp/translations/utilities/mozilla_dtd_parser.py (+144/-0)
lib/lp/translations/utilities/mozilla_xpi_importer.py (+1/-128)
lib/lp/translations/utilities/tests/test_xpi_dtd_format.py (+1/-1)
To merge this branch: bzr merge lp:~jtv/launchpad/xpi-dtd-parser
Reviewer Review Type Date Requested Status
Leonard Richardson (community) Approve
Launchpad code reviewers code Pending
Review via email: mp+30383@code.launchpad.net

Commit message

Extract XPI DTD parser.

Description of the change

= DTD Parser =

This extracts the code for parsing an XPI DTD file into a file of its own. It's just cleaner for now, but it also looks like we'll want to build a full-blown importer around this parser later.

To test:
{{{
./bin/test -vvc -m lp.translations -t dtd
}}}

Jeroen

To post a comment you must log in.
Revision history for this message
Leonard Richardson (leonardr) wrote :

+1

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/translations/scripts/validate_translations_file.py'
--- lib/lp/translations/scripts/validate_translations_file.py 2010-01-06 06:04:54 +0000
+++ lib/lp/translations/scripts/validate_translations_file.py 2010-07-20 11:02:51 +0000
@@ -15,8 +15,9 @@
1515
16from canonical.launchpad import scripts16from canonical.launchpad import scripts
17from lp.translations.utilities.gettext_po_parser import POParser17from lp.translations.utilities.gettext_po_parser import POParser
18from lp.translations.utilities.mozilla_dtd_parser import DtdFile
18from lp.translations.utilities.mozilla_xpi_importer import (19from lp.translations.utilities.mozilla_xpi_importer import (
19 DtdFile, MozillaZipImportParser)20 MozillaZipImportParser)
20from lp.translations.utilities.xpi_manifest import XpiManifest21from lp.translations.utilities.xpi_manifest import XpiManifest
2122
2223
2324
=== modified file 'lib/lp/translations/utilities/gettext_po_importer.py'
--- lib/lp/translations/utilities/gettext_po_importer.py 2009-07-17 00:26:05 +0000
+++ lib/lp/translations/utilities/gettext_po_importer.py 2010-07-20 11:02:51 +0000
@@ -1,19 +1,19 @@
1# Copyright 2009 Canonical Ltd. This software is licensed under the1# Copyright 2009-2010 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4__metaclass__ = type4__metaclass__ = type
55
6__all__ = [6__all__ = [
7 'GettextPOImporter'7 'GettextPOImporter',
8 ]8 ]
99
10from zope.component import getUtility10from zope.component import getUtility
11from zope.interface import implements11from zope.interface import implements
1212
13from lp.translations.interfaces.translationfileformat import (
14 TranslationFileFormat)
13from lp.translations.interfaces.translationimporter import (15from lp.translations.interfaces.translationimporter import (
14 ITranslationFormatImporter)16 ITranslationFormatImporter)
15from lp.translations.interfaces.translationfileformat import (
16 TranslationFileFormat)
17from lp.translations.utilities.gettext_po_parser import (17from lp.translations.utilities.gettext_po_parser import (
18 POParser, POHeader)18 POParser, POHeader)
19from canonical.librarian.interfaces import ILibrarianClient19from canonical.librarian.interfaces import ILibrarianClient
2020
=== added file 'lib/lp/translations/utilities/mozilla_dtd_parser.py'
--- lib/lp/translations/utilities/mozilla_dtd_parser.py 1970-01-01 00:00:00 +0000
+++ lib/lp/translations/utilities/mozilla_dtd_parser.py 2010-07-20 11:02:51 +0000
@@ -0,0 +1,144 @@
1# Copyright 2010 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).
3
4"""Importer for DTD files as found in XPI archives."""
5
6__metaclass__ = type
7__all__ = [
8 'DtdFile'
9 ]
10
11from old_xmlplus.parsers.xmlproc import dtdparser, xmldtd, utils
12
13from lp.translations.interfaces.translationimporter import (
14 TranslationFormatInvalidInputError,
15 TranslationFormatSyntaxError)
16from lp.translations.utilities.translation_common_format import (
17 TranslationMessageData)
18from lp.translations.interfaces.translations import TranslationConstants
19
20
21class MozillaDtdConsumer(xmldtd.WFCDTD):
22 """Mozilla DTD translatable message parser.
23
24 msgids are stored as entities. This class extracts it along
25 with translations, comments and source references.
26 """
27 def __init__(self, parser, filename, chrome_path, messages):
28 self.started = False
29 self.last_comment = None
30 self.chrome_path = chrome_path
31 self.messages = messages
32 self.filename = filename
33 xmldtd.WFCDTD.__init__(self, parser)
34
35 def dtd_start(self):
36 """See `xmldtd.WFCDTD`."""
37 self.started = True
38
39 def dtd_end(self):
40 """See `xmldtd.WFCDTD`."""
41 self.started = False
42
43 def handle_comment(self, contents):
44 """See `xmldtd.WFCDTD`."""
45 if not self.started:
46 return
47
48 if self.last_comment is not None:
49 self.last_comment += contents
50 elif len(contents) > 0:
51 self.last_comment = contents
52
53 if self.last_comment and not self.last_comment.endswith('\n'):
54 # Comments must end always with a new line.
55 self.last_comment += '\n'
56
57 def new_general_entity(self, name, value):
58 """See `xmldtd.WFCDTD`."""
59 if not self.started:
60 return
61
62 message = TranslationMessageData()
63 message.msgid_singular = name
64 # CarlosPerelloMarin 20070326: xmldtd parser does an inline
65 # parsing which means that the content is all in a single line so we
66 # don't have a way to show the line number with the source reference.
67 message.file_references_list = ["%s(%s)" % (self.filename, name)]
68 message.addTranslation(TranslationConstants.SINGULAR_FORM, value)
69 message.singular_text = value
70 message.context = self.chrome_path
71 message.source_comment = self.last_comment
72 self.messages.append(message)
73 self.started += 1
74 self.last_comment = None
75
76
77class DtdErrorHandler(utils.ErrorCounter):
78 """Error handler for the DTD parser."""
79 filename = None
80
81 def error(self, msg):
82 raise TranslationFormatSyntaxError(
83 filename=self.filename, message=msg)
84
85 def fatal(self, msg):
86 raise TranslationFormatInvalidInputError(
87 filename=self.filename, message=msg)
88
89
90class DummyDtdFile:
91 """"File" returned when DTD SYSTEM entity tries to include a file."""
92 done = False
93
94 def read(self, *args, **kwargs):
95 """Minimally satisfy attempt to read an included DTD file."""
96 if self.done:
97 return ''
98 else:
99 self.done = True
100 return '<!-- SYSTEM entities not supported. -->'
101
102 def close(self):
103 """Satisfy attempt to close file."""
104 pass
105
106
107class DtdInputSourceFactoryStub:
108 """Replace the class the DTD parser uses to include other DTD files."""
109
110 def create_input_source(self, sysid):
111 """Minimally satisfy attempt to open an included DTD file.
112
113 This is called when the DTD parser hits a SYSTEM entity.
114 """
115 return DummyDtdFile()
116
117
118class DtdFile:
119 """Class for reading translatable messages from a .dtd file.
120
121 It uses DTDParser which fills self.messages with parsed messages.
122 """
123 def __init__(self, filename, chrome_path, content):
124 self.messages = []
125 self.filename = filename
126 self.chrome_path = chrome_path
127
128 # .dtd files are supposed to be using UTF-8 encoding, if the file is
129 # using another encoding, it's against the standard so we reject it
130 try:
131 content = content.decode('utf-8')
132 except UnicodeDecodeError:
133 raise TranslationFormatInvalidInputError, (
134 'Content is not valid UTF-8 text')
135
136 error_handler = DtdErrorHandler()
137 error_handler.filename = filename
138
139 parser = dtdparser.DTDParser()
140 parser.set_error_handler(error_handler)
141 parser.set_inputsource_factory(DtdInputSourceFactoryStub())
142 dtd = MozillaDtdConsumer(parser, filename, chrome_path, self.messages)
143 parser.set_dtd_consumer(dtd)
144 parser.parse_string(content)
0145
=== modified file 'lib/lp/translations/utilities/mozilla_xpi_importer.py'
--- lib/lp/translations/utilities/mozilla_xpi_importer.py 2010-01-05 13:44:13 +0000
+++ lib/lp/translations/utilities/mozilla_xpi_importer.py 2010-07-20 11:02:51 +0000
@@ -4,7 +4,6 @@
4__metaclass__ = type4__metaclass__ = type
55
6__all__ = [6__all__ = [
7 'DtdFile',
8 'MozillaXpiImporter',7 'MozillaXpiImporter',
9 'MozillaZipImportParser',8 'MozillaZipImportParser',
10 ]9 ]
@@ -12,8 +11,6 @@
12from cStringIO import StringIO11from cStringIO import StringIO
13import textwrap12import textwrap
1413
15from old_xmlplus.parsers.xmlproc import dtdparser, xmldtd, utils
16
17from zope.component import getUtility14from zope.component import getUtility
18from zope.interface import implements15from zope.interface import implements
1916
@@ -27,13 +24,13 @@
27from lp.translations.utilities.translation_common_format import (24from lp.translations.utilities.translation_common_format import (
28 TranslationFileData,25 TranslationFileData,
29 TranslationMessageData)26 TranslationMessageData)
27from lp.translations.utilities.mozilla_dtd_parser import DtdFile
30from lp.translations.utilities.mozilla_zip import (28from lp.translations.utilities.mozilla_zip import (
31 MozillaZipTraversal)29 MozillaZipTraversal)
32from lp.translations.utilities.xpi_header import XpiHeader30from lp.translations.utilities.xpi_header import XpiHeader
33from canonical.librarian.interfaces import ILibrarianClient31from canonical.librarian.interfaces import ILibrarianClient
3432
3533
36
37def add_source_comment(message, comment):34def add_source_comment(message, comment):
38 """Add the given comment inside message.source_comment."""35 """Add the given comment inside message.source_comment."""
39 if message.source_comment:36 if message.source_comment:
@@ -160,130 +157,6 @@
160 self.messages.append(message)157 self.messages.append(message)
161158
162159
163class MozillaDtdConsumer(xmldtd.WFCDTD):
164 """Mozilla DTD translatable message parser.
165
166 msgids are stored as entities. This class extracts it along
167 with translations, comments and source references.
168 """
169 def __init__(self, parser, filename, chrome_path, messages):
170 self.started = False
171 self.last_comment = None
172 self.chrome_path = chrome_path
173 self.messages = messages
174 self.filename = filename
175 xmldtd.WFCDTD.__init__(self, parser)
176
177 def dtd_start(self):
178 """See `xmldtd.WFCDTD`."""
179 self.started = True
180
181 def dtd_end(self):
182 """See `xmldtd.WFCDTD`."""
183 self.started = False
184
185 def handle_comment(self, contents):
186 """See `xmldtd.WFCDTD`."""
187 if not self.started:
188 return
189
190 if self.last_comment is not None:
191 self.last_comment += contents
192 elif len(contents) > 0:
193 self.last_comment = contents
194
195 if self.last_comment and not self.last_comment.endswith('\n'):
196 # Comments must end always with a new line.
197 self.last_comment += '\n'
198
199 def new_general_entity(self, name, value):
200 """See `xmldtd.WFCDTD`."""
201 if not self.started:
202 return
203
204 message = TranslationMessageData()
205 message.msgid_singular = name
206 # CarlosPerelloMarin 20070326: xmldtd parser does an inline
207 # parsing which means that the content is all in a single line so we
208 # don't have a way to show the line number with the source reference.
209 message.file_references_list = ["%s(%s)" % (self.filename, name)]
210 message.addTranslation(TranslationConstants.SINGULAR_FORM, value)
211 message.singular_text = value
212 message.context = self.chrome_path
213 message.source_comment = self.last_comment
214 self.messages.append(message)
215 self.started += 1
216 self.last_comment = None
217
218
219class DtdErrorHandler(utils.ErrorCounter):
220 """Error handler for the DTD parser."""
221 filename = None
222
223 def error(self, msg):
224 raise TranslationFormatSyntaxError(
225 filename=self.filename, message=msg)
226
227 def fatal(self, msg):
228 raise TranslationFormatInvalidInputError(
229 filename=self.filename, message=msg)
230
231
232class DummyDtdFile:
233 """"File" returned when DTD SYSTEM entity tries to include a file."""
234 done = False
235
236 def read(self, *args, **kwargs):
237 """Minimally satisfy attempt to read an included DTD file."""
238 if self.done:
239 return ''
240 else:
241 self.done = True
242 return '<!-- SYSTEM entities not supported. -->'
243
244 def close(self):
245 """Satisfy attempt to close file."""
246 pass
247
248
249class DtdInputSourceFactoryStub:
250 """Replace the class the DTD parser uses to include other DTD files."""
251
252 def create_input_source(self, sysid):
253 """Minimally satisfy attempt to open an included DTD file.
254
255 This is called when the DTD parser hits a SYSTEM entity.
256 """
257 return DummyDtdFile()
258
259
260class DtdFile:
261 """Class for reading translatable messages from a .dtd file.
262
263 It uses DTDParser which fills self.messages with parsed messages.
264 """
265 def __init__(self, filename, chrome_path, content):
266 self.messages = []
267 self.filename = filename
268 self.chrome_path = chrome_path
269
270 # .dtd files are supposed to be using UTF-8 encoding, if the file is
271 # using another encoding, it's against the standard so we reject it
272 try:
273 content = content.decode('utf-8')
274 except UnicodeDecodeError:
275 raise TranslationFormatInvalidInputError, (
276 'Content is not valid UTF-8 text')
277
278 error_handler = DtdErrorHandler()
279 error_handler.filename = filename
280
281 parser = dtdparser.DTDParser()
282 parser.set_error_handler(error_handler)
283 parser.set_inputsource_factory(DtdInputSourceFactoryStub())
284 dtd = MozillaDtdConsumer(parser, filename, chrome_path, self.messages)
285 parser.set_dtd_consumer(dtd)
286 parser.parse_string(content)
287160
288161
289def valid_property_msgid(msgid):162def valid_property_msgid(msgid):
290163
=== modified file 'lib/lp/translations/utilities/tests/test_xpi_dtd_format.py'
--- lib/lp/translations/utilities/tests/test_xpi_dtd_format.py 2009-07-17 00:26:05 +0000
+++ lib/lp/translations/utilities/tests/test_xpi_dtd_format.py 2010-07-20 11:02:51 +0000
@@ -5,7 +5,7 @@
55
6import unittest6import unittest
77
8from lp.translations.utilities.mozilla_xpi_importer import DtdFile8from lp.translations.utilities.mozilla_dtd_parser import DtdFile
9from lp.translations.interfaces.translationimporter import (9from lp.translations.interfaces.translationimporter import (
10 TranslationFormatInvalidInputError)10 TranslationFormatInvalidInputError)
1111