Merge lp:~wgrant/launchpad/bug-1707890 into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 18446
Proposed branch: lp:~wgrant/launchpad/bug-1707890
Merge into: lp:launchpad
Diff against target: 105 lines (+37/-29)
2 files modified
lib/lp/translations/browser/browser_helpers.py (+24/-29)
lib/lp/translations/doc/browser-helpers.txt (+13/-0)
To merge this branch: bzr merge lp:~wgrant/launchpad/bug-1707890
Reviewer Review Type Date Requested Status
Adam Collard (community) Approve
Launchpad code reviewers Pending
Review via email: mp+328652@code.launchpad.net

Commit message

Fix Translations' text_to_html to not parse HTML as a C format string.

Description of the change

Fix Translations' text_to_html to not parse HTML as a C format string.

The c-format flag was previously applied after the input had been
converted to HTML, which was pretty crazy.

To post a comment you must log in.
Revision history for this message
Adam Collard (adam-collard) wrote :

LGTM!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/translations/browser/browser_helpers.py'
--- lib/lp/translations/browser/browser_helpers.py 2012-11-29 06:35:35 +0000
+++ lib/lp/translations/browser/browser_helpers.py 2017-08-07 09:09:17 +0000
@@ -54,7 +54,7 @@
54 if text is None:54 if text is None:
55 return None55 return None
5656
57 lines = []57 markup_lines = []
58 # Replace leading and trailing spaces on each line with special markup.58 # Replace leading and trailing spaces on each line with special markup.
59 if u'\r\n' in text:59 if u'\r\n' in text:
60 newline_chars = u'\r\n'60 newline_chars = u'\r\n'
@@ -62,7 +62,7 @@
62 newline_chars = u'\r'62 newline_chars = u'\r'
63 else:63 else:
64 newline_chars = u'\n'64 newline_chars = u'\n'
65 for line in html_escape(text).split(newline_chars):65 for line in text.split(newline_chars):
66 # Pattern:66 # Pattern:
67 # - group 1: zero or more spaces: leading whitespace67 # - group 1: zero or more spaces: leading whitespace
68 # - group 2: zero or more groups of (zero or68 # - group 2: zero or more groups of (zero or
@@ -72,37 +72,32 @@
72 match = re.match(u'^( *)((?: *[^ ]+)*)( *)$', line)72 match = re.match(u'^( *)((?: *[^ ]+)*)( *)$', line)
7373
74 if match:74 if match:
75 lines.append(75 format_segments = None
76 space * len(match.group(1)) +76 if 'c-format' in flags:
77 match.group(2) +77 try:
78 space * len(match.group(3)))78 format_segments = parse_cformat_string(match.group(2))
79 except UnrecognisedCFormatString:
80 pass
81 if format_segments is not None:
82 markup = ''
83 for segment in format_segments:
84 type, content = segment
85
86 if type == 'interpolation':
87 markup += (u'<code>%s</code>' % html_escape(content))
88 elif type == 'string':
89 markup += html_escape(content)
90 else:
91 markup = html_escape(match.group(2))
92 markup_lines.append(
93 space * len(match.group(1))
94 + markup
95 + space * len(match.group(3)))
79 else:96 else:
80 raise AssertionError(97 raise AssertionError(
81 "A regular expression that should always match didn't.")98 "A regular expression that should always match didn't.")
8299
83 if 'c-format' in flags:100 return expand_rosetta_escapes(newline.join(markup_lines))
84 # Replace c-format sequences with marked-up versions. If there is a
85 # problem parsing the c-format sequences on a particular line, that
86 # line is left unformatted.
87 for i in range(len(lines)):
88 formatted_line = ''
89
90 try:
91 segments = parse_cformat_string(lines[i])
92 except UnrecognisedCFormatString:
93 continue
94
95 for segment in segments:
96 type, content = segment
97
98 if type == 'interpolation':
99 formatted_line += (u'<code>%s</code>' % content)
100 elif type == 'string':
101 formatted_line += content
102
103 lines[i] = formatted_line
104
105 return expand_rosetta_escapes(newline.join(lines))
106101
107102
108def convert_newlines_to_web_form(unicode_text):103def convert_newlines_to_web_form(unicode_text):
109104
=== modified file 'lib/lp/translations/doc/browser-helpers.txt'
--- lib/lp/translations/doc/browser-helpers.txt 2010-10-08 16:58:10 +0000
+++ lib/lp/translations/doc/browser-helpers.txt 2017-08-07 09:09:17 +0000
@@ -206,6 +206,19 @@
206 >>> text_to_html(u'foo\rbar', [])206 >>> text_to_html(u'foo\rbar', [])
207 u'foo<img alt="" src="/@@/translation-newline" /><br/>\nbar'207 u'foo<img alt="" src="/@@/translation-newline" /><br/>\nbar'
208208
209HTML in the input string is escaped.
210
211 >>> text_to_html(u'<b>Test %d</b>', [])
212 u'&lt;b&gt;Test %d&lt;/b&gt;'
213 >>> text_to_html(u'<b>Test %d</b>', ['c-format'])
214 u'&lt;b&gt;Test <code>%d</code>&lt;/b&gt;'
215
216Format strings are parsed before markup is generated (the %q is invalid
217as it has no conversion specifier until the <samp> is injected):
218
219 >>> text_to_html(u'Test %q: ', ['c-format'])
220 u'Test %q:<samp> </samp>'
221
209222
210convert_newlines_to_web_form223convert_newlines_to_web_form
211----------------------------224----------------------------