1
=== modified file 'lib/lp/translations/utilities/gettext_po_exporter.py'
2
--- lib/lp/translations/utilities/gettext_po_exporter.py	2009-11-18 13:26:03 +0000
3
+++ lib/lp/translations/utilities/gettext_po_exporter.py	2010-01-15 06:56:16 +0000
4
@@ -277,13 +277,23 @@
5
277
        """See `ITranslationFormatExporter`."""
277
        """See `ITranslationFormatExporter`."""
6
278
        return export_translation_message(translation_message)
278
        return export_translation_message(translation_message)
7
279
279
9
280
    def _makeExportedHeader(self, translation_file, charset=None):
280
    def _makeExportedHeader(self, translation_file):
10
281
        """Transform the header information into a format suitable for export.
281
        """Transform the header information into a format suitable for export.
11
282
282
12
283
        :return: Unicode string containing the header.
283
        :return: Unicode string containing the header.
13
284
        """
284
        """
14
285
        raise NotImplementedError
285
        raise NotImplementedError
15
286
286
16
287
    def _encode_file_content(self, translation_file, exported_content):
17
288
        """Try to encode the file using the charset given in the header."""
18
289
        file_content = (
19
290
            self._makeExportedHeader(translation_file) +
20
291
            u'\n\n' +
21
292
            exported_content)
22
293
        encoded_file_content = file_content.encode(
23
294
            translation_file.header.charset)
24
295
        return encoded_file_content
25
296
26
287
    def exportTranslationFiles(self, translation_files, ignore_obsolete=False,
297
    def exportTranslationFiles(self, translation_files, ignore_obsolete=False,
27
288
                               force_utf8=False):
298
                               force_utf8=False):
28
289
        """See `ITranslationFormatExporter`."""
299
        """See `ITranslationFormatExporter`."""
29
@@ -310,10 +320,7 @@
30
310
                        translation_file.language_code,
320
                        translation_file.language_code,
31
311
                        file_extension))
321
                        file_extension))
32
312
322
37
313
            if force_utf8:
323
            chunks = []
34
314
                translation_file.header.charset = 'UTF-8'
35
315
            chunks = [self._makeExportedHeader(translation_file)]
36
316
38
317
            seen_keys = {}
324
            seen_keys = {}
39
318
325
40
319
            for message in translation_file.messages:
326
            for message in translation_file.messages:
41
@@ -335,49 +342,42 @@
42
335
                if (message.is_obsolete and
342
                if (message.is_obsolete and
43
336
                    (ignore_obsolete or len(message.translations) == 0)):
343
                    (ignore_obsolete or len(message.translations) == 0)):
44
337
                    continue
344
                    continue
83
338
                exported_message = self.exportTranslationMessageData(message)
345
                chunks.append(self.exportTranslationMessageData(message))
84
339
                try:
346
47
340
                    encoded_text = exported_message.encode(
48
341
                        translation_file.header.charset)
49
342
                except UnicodeEncodeError, error:
50
343
                    if translation_file.header.charset.upper() == 'UTF-8':
51
344
                        # It's already UTF-8, we cannot do anything.
52
345
                        raise UnicodeEncodeError(
53
346
                            '%s:\n%s' % (file_path, str(error)))
54
347
55
348
                    # This message cannot be represented in the current
56
349
                    # encoding.
57
350
                    if translation_file.path:
58
351
                        file_description = translation_file.path
59
352
                    elif translation_file.language_code:
60
353
                        file_description = (
61
354
                            "%s translation" % translation_file.language_code)
62
355
                    else:
63
356
                        file_description = "template"
64
357
                    logging.info(
65
358
                        "Can't represent %s as %s; using UTF-8 instead." % (
66
359
                            file_description,
67
360
                            translation_file.header.charset.upper()))
68
361
69
362
                    old_charset = translation_file.header.charset
70
363
                    translation_file.header.charset = 'UTF-8'
71
364
                    # We need to update the header too.
72
365
                    chunks[0] = self._makeExportedHeader(
73
366
                        translation_file, old_charset)
74
367
                    # Update already exported entries.
75
368
                    for index, chunk in enumerate(chunks):
76
369
                        chunks[index] = chunk.decode(
77
370
                            old_charset).encode('UTF-8')
78
371
                    encoded_text = exported_message.encode('UTF-8')
79
372
80
373
                chunks.append(encoded_text)
81
374
82
375
            exported_file_content = '\n\n'.join(chunks)
85
376
347
86
377
            # Gettext .po files are supposed to end with a new line.
348
            # Gettext .po files are supposed to end with a new line.
90
378
            exported_file_content += '\n'
349
            exported_file_content = u'\n\n'.join(chunks) + u'\n'
91
379
350
92
380
            storage.addFile(file_path, file_extension, exported_file_content)
351
            # Try to encode the file
93
352
            if force_utf8:
94
353
                translation_file.header.charset = 'UTF-8'
95
354
            try:
96
355
                encoded_file_content = self._encode_file_content(
97
356
                    translation_file, exported_file_content)
98
357
            except UnicodeEncodeError:
99
358
                if translation_file.header.charset.upper() == 'UTF-8':
100
359
                    # It's already UTF-8, we cannot do anything.
101
360
                    raise
102
361
                # This file content cannot be represented in the current
103
362
                # encoding.
104
363
                if translation_file.path:
105
364
                    file_description = translation_file.path
106
365
                elif translation_file.language_code:
107
366
                    file_description = (
108
367
                        "%s translation" % translation_file.language_code)
109
368
                else:
110
369
                    file_description = "template"
111
370
                logging.info(
112
371
                    "Can't represent %s as %s; using UTF-8 instead." % (
113
372
                        file_description,
114
373
                        translation_file.header.charset.upper()))
115
374
                # Use UTF-8 instead.
116
375
                translation_file.header.charset = 'UTF-8'
117
376
                # This either succeeds or raises UnicodeError.
118
377
                encoded_file_content = self._encode_file_content(
119
378
                    translation_file, exported_file_content)
120
379
121
380
            storage.addFile(file_path, file_extension, encoded_file_content)
122
381
381
123
382
        return storage.export()
382
        return storage.export()
124
383
383
125
@@ -410,13 +410,11 @@
126
410
            TranslationFileFormat.PO,
410
            TranslationFileFormat.PO,
127
411
            TranslationFileFormat.KDEPO]
411
            TranslationFileFormat.KDEPO]
128
412
412
130
413
    def _makeExportedHeader(self, translation_file, charset=None):
413
    def _makeExportedHeader(self, translation_file):
131
414
        """Create a standard gettext PO header, encoded as a message.
414
        """Create a standard gettext PO header, encoded as a message.
132
415
415
133
416
        :return: The header message as a unicode string.
416
        :return: The header message as a unicode string.
134
417
        """
417
        """
135
418
        if charset is None:
136
419
            charset = translation_file.header.charset
137
420
        header_translation_message = TranslationMessageData()
418
        header_translation_message = TranslationMessageData()
138
421
        header_translation_message.addTranslation(
419
        header_translation_message.addTranslation(
139
422
            TranslationConstants.SINGULAR_FORM,
420
            TranslationConstants.SINGULAR_FORM,
140
@@ -427,7 +425,7 @@
141
427
            header_translation_message.flags.update(['fuzzy'])
425
            header_translation_message.flags.update(['fuzzy'])
142
428
        exported_header = self.exportTranslationMessageData(
426
        exported_header = self.exportTranslationMessageData(
143
429
            header_translation_message)
427
            header_translation_message)
145
430
        return exported_header.encode(charset)
428
        return exported_header
146
431
429
147
432
430
148
433
class GettextPOChangedExporter(GettextPOExporterBase):
431
class GettextPOChangedExporter(GettextPOExporterBase):
149
@@ -447,15 +445,13 @@
150
447
        self.format = TranslationFileFormat.POCHANGED
445
        self.format = TranslationFileFormat.POCHANGED
151
448
        self.supported_source_formats = []
446
        self.supported_source_formats = []
152
449
447
154
450
    def _makeExportedHeader(self, translation_file, charset=None):
448
    def _makeExportedHeader(self, translation_file):
155
451
        """Create a header for changed PO files.
449
        """Create a header for changed PO files.
156
452
        This is a reduced header containing a warning that this is an
450
        This is a reduced header containing a warning that this is an
157
453
        icomplete gettext PO file.
451
        icomplete gettext PO file.
158
454
        :return: The header as a unicode string.
452
        :return: The header as a unicode string.
159
455
        """
453
        """
163
456
        if charset is None:
454
        return self.exported_header
161
457
            charset = translation_file.header.charset
162
458
        return self.exported_header.encode(charset)
164
459
455
165
460
    def acceptSingularClash(self, previous_message, current_message):
456
    def acceptSingularClash(self, previous_message, current_message):
166
461
        """See `GettextPOExporterBase`."""
457
        """See `GettextPOExporterBase`."""
167
462
458
168
=== modified file 'lib/lp/translations/utilities/tests/test_gettext_po_exporter.py'
169
--- lib/lp/translations/utilities/tests/test_gettext_po_exporter.py	2009-11-01 22:50:17 +0000
170
+++ lib/lp/translations/utilities/tests/test_gettext_po_exporter.py	2010-01-15 06:56:16 +0000
171
@@ -263,33 +263,9 @@
172
263
        for pofile in pofiles:
263
        for pofile in pofiles:
173
264
            compare(self, pofile)
264
            compare(self, pofile)
174
265
265
200
266
266
    def _testBrokenEncoding(self, pofile_content):
176
267
    def testBrokenEncodingExport(self):
177
268
        """Test what happens when the content and the encoding don't agree.
178
269
179
270
        If a pofile fails to encode using the character set specified in the
180
271
        header, the header should be changed to specify to UTF-8 and the
181
272
        pofile exported accordingly.
182
273
        """
183
274
184
275
        pofile = dedent('''
185
276
            msgid ""
186
277
            msgstr ""
187
278
            "Project-Id-Version: foo\\n"
188
279
            "Report-Msgid-Bugs-To: \\n"
189
280
            "POT-Creation-Date: 2007-07-09 03:39+0100\\n"
190
281
            "PO-Revision-Date: 2001-09-09 01:46+0000\\n"
191
282
            "Last-Translator: Kubla Kahn <kk@pleasure-dome.com>\\n"
192
283
            "Language-Team: LANGUAGE <LL@li.org>\\n"
193
284
            "MIME-Version: 1.0\\n"
194
285
            "Content-Type: text/plain; charset=%s\\n"
195
286
            "Content-Transfer-Encoding: 8bit\\n"
196
287
197
288
            msgid "a"
198
289
            msgstr "%s"
199
290
            ''')
201
291
        translation_file = self.parser.parse(
267
        translation_file = self.parser.parse(
203
292
            pofile % ('ISO-8859-15', '\xe1'))
268
            pofile_content % {'charset': 'ISO-8859-15', 'special': '\xe1'})
204
293
        translation_file.is_template = False
269
        translation_file.is_template = False
205
294
        translation_file.language_code = 'es'
270
        translation_file.language_code = 'es'
206
295
        translation_file.path = 'po/es.po'
271
        translation_file.path = 'po/es.po'
207
@@ -302,9 +278,65 @@
208
302
            [translation_file])
278
            [translation_file])
209
303
279
210
304
        self._compareImportAndExport(
280
        self._compareImportAndExport(
212
305
            pofile.strip() % ('UTF-8', '\xc3\xa1'),
281
            pofile_content.strip() % {
213
282
                'charset': 'UTF-8', 'special': '\xc3\xa1'},
214
306
            exported_file.read().strip())
283
            exported_file.read().strip())
215
307
284
216
285
    def testBrokenEncodingExport(self):
217
286
        """Test what happens when the content and the encoding don't agree.
218
287
219
288
        If a pofile fails to encode using the character set specified in the
220
289
        header, the header should be changed to specify to UTF-8 and the
221
290
        pofile exported accordingly.
222
291
        """
223
292
224
293
        pofile_content = dedent('''
225
294
            msgid ""
226
295
            msgstr ""
227
296
            "Project-Id-Version: foo\\n"
228
297
            "Report-Msgid-Bugs-To: \\n"
229
298
            "POT-Creation-Date: 2007-07-09 03:39+0100\\n"
230
299
            "PO-Revision-Date: 2001-09-09 01:46+0000\\n"
231
300
            "Last-Translator: Kubla Kahn <kk@pleasure-dome.com>\\n"
232
301
            "Language-Team: LANGUAGE <LL@li.org>\\n"
233
302
            "MIME-Version: 1.0\\n"
234
303
            "Content-Type: text/plain; charset=%(charset)s\\n"
235
304
            "Content-Transfer-Encoding: 8bit\\n"
236
305
237
306
            msgid "a"
238
307
            msgstr "%(special)s"
239
308
            ''')
240
309
        self._testBrokenEncoding(pofile_content)
241
310
242
311
    def testBrokenEncodingHeader(self):
243
312
        """A header field might require a different encoding, too.
244
313
245
314
        This usually happens if the Last-Translator name contains non-ascii
246
315
        characters.
247
316
248
317
        If a pofile fails to encode using the character set specified in the
249
318
        header, the header should be changed to specify to UTF-8 and the
250
319
        pofile exported accordingly.
251
320
        """
252
321
253
322
        pofile_content = dedent('''
254
323
            msgid ""
255
324
            msgstr ""
256
325
            "Project-Id-Version: foo\\n"
257
326
            "Report-Msgid-Bugs-To: \\n"
258
327
            "POT-Creation-Date: 2007-07-09 03:39+0100\\n"
259
328
            "PO-Revision-Date: 2001-09-09 01:46+0000\\n"
260
329
            "Last-Translator: Kubla K%(special)shn <kk@pleasure-dome.com>\\n"
261
330
            "Language-Team: LANGUAGE <LL@li.org>\\n"
262
331
            "MIME-Version: 1.0\\n"
263
332
            "Content-Type: text/plain; charset=%(charset)s\\n"
264
333
            "Content-Transfer-Encoding: 8bit\\n"
265
334
266
335
            msgid "a"
267
336
            msgstr "b"
268
337
            ''')
269
338
        self._testBrokenEncoding(pofile_content)
270
339
271
308
    def testIncompletePluralMessage(self):
340
    def testIncompletePluralMessage(self):
272
309
        """Test export correctness for partial plural messages."""
341
        """Test export correctness for partial plural messages."""
273
310
342
Reviewer	Review Type	Date Requested	Status
Данило Шеган (community)	code	2010-01-13	Approve on 2010-01-13
Review via email: mp+17304@code.launchpad.net