Merge lp:~wgrant/launchpad/bug-1707890 into lp:launchpad

Proposed by William Grant
Status: Merged
Merged at revision: 18446
Proposed branch: lp:~wgrant/launchpad/bug-1707890
Merge into: lp:launchpad
Diff against target: 105 lines (+37/-29)
2 files modified
lib/lp/translations/browser/browser_helpers.py (+24/-29)
lib/lp/translations/doc/browser-helpers.txt (+13/-0)
To merge this branch: bzr merge lp:~wgrant/launchpad/bug-1707890
Reviewer Review Type Date Requested Status
Adam Collard (community) Approve
Launchpad code reviewers Pending
Review via email: mp+328652@code.launchpad.net

Commit message

Fix Translations' text_to_html to not parse HTML as a C format string.

Description of the change

Fix Translations' text_to_html to not parse HTML as a C format string.

The c-format flag was previously applied after the input had been
converted to HTML, which was pretty crazy.

To post a comment you must log in.
Revision history for this message
Adam Collard (adam-collard) wrote :

LGTM!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/translations/browser/browser_helpers.py'
2--- lib/lp/translations/browser/browser_helpers.py 2012-11-29 06:35:35 +0000
3+++ lib/lp/translations/browser/browser_helpers.py 2017-08-07 09:09:17 +0000
4@@ -54,7 +54,7 @@
5 if text is None:
6 return None
7
8- lines = []
9+ markup_lines = []
10 # Replace leading and trailing spaces on each line with special markup.
11 if u'\r\n' in text:
12 newline_chars = u'\r\n'
13@@ -62,7 +62,7 @@
14 newline_chars = u'\r'
15 else:
16 newline_chars = u'\n'
17- for line in html_escape(text).split(newline_chars):
18+ for line in text.split(newline_chars):
19 # Pattern:
20 # - group 1: zero or more spaces: leading whitespace
21 # - group 2: zero or more groups of (zero or
22@@ -72,37 +72,32 @@
23 match = re.match(u'^( *)((?: *[^ ]+)*)( *)$', line)
24
25 if match:
26- lines.append(
27- space * len(match.group(1)) +
28- match.group(2) +
29- space * len(match.group(3)))
30+ format_segments = None
31+ if 'c-format' in flags:
32+ try:
33+ format_segments = parse_cformat_string(match.group(2))
34+ except UnrecognisedCFormatString:
35+ pass
36+ if format_segments is not None:
37+ markup = ''
38+ for segment in format_segments:
39+ type, content = segment
40+
41+ if type == 'interpolation':
42+ markup += (u'<code>%s</code>' % html_escape(content))
43+ elif type == 'string':
44+ markup += html_escape(content)
45+ else:
46+ markup = html_escape(match.group(2))
47+ markup_lines.append(
48+ space * len(match.group(1))
49+ + markup
50+ + space * len(match.group(3)))
51 else:
52 raise AssertionError(
53 "A regular expression that should always match didn't.")
54
55- if 'c-format' in flags:
56- # Replace c-format sequences with marked-up versions. If there is a
57- # problem parsing the c-format sequences on a particular line, that
58- # line is left unformatted.
59- for i in range(len(lines)):
60- formatted_line = ''
61-
62- try:
63- segments = parse_cformat_string(lines[i])
64- except UnrecognisedCFormatString:
65- continue
66-
67- for segment in segments:
68- type, content = segment
69-
70- if type == 'interpolation':
71- formatted_line += (u'<code>%s</code>' % content)
72- elif type == 'string':
73- formatted_line += content
74-
75- lines[i] = formatted_line
76-
77- return expand_rosetta_escapes(newline.join(lines))
78+ return expand_rosetta_escapes(newline.join(markup_lines))
79
80
81 def convert_newlines_to_web_form(unicode_text):
82
83=== modified file 'lib/lp/translations/doc/browser-helpers.txt'
84--- lib/lp/translations/doc/browser-helpers.txt 2010-10-08 16:58:10 +0000
85+++ lib/lp/translations/doc/browser-helpers.txt 2017-08-07 09:09:17 +0000
86@@ -206,6 +206,19 @@
87 >>> text_to_html(u'foo\rbar', [])
88 u'foo<img alt="" src="/@@/translation-newline" /><br/>\nbar'
89
90+HTML in the input string is escaped.
91+
92+ >>> text_to_html(u'<b>Test %d</b>', [])
93+ u'&lt;b&gt;Test %d&lt;/b&gt;'
94+ >>> text_to_html(u'<b>Test %d</b>', ['c-format'])
95+ u'&lt;b&gt;Test <code>%d</code>&lt;/b&gt;'
96+
97+Format strings are parsed before markup is generated (the %q is invalid
98+as it has no conversion specifier until the <samp> is injected):
99+
100+ >>> text_to_html(u'Test %q: ', ['c-format'])
101+ u'Test %q:<samp> </samp>'
102+
103
104 convert_newlines_to_web_form
105 ----------------------------