Merge lp:~cjwatson/launchpad/defusedxml into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18997
Proposed branch: lp:~cjwatson/launchpad/defusedxml
Merge into: lp:launchpad
Diff against target: 283 lines (+36/-25)
11 files modified
constraints.txt (+1/-0)
lib/lp/bugs/externalbugtracker/bugzilla.py (+4/-2)
lib/lp/bugs/externalbugtracker/xmlrpc.py (+5/-1)
lib/lp/bugs/scripts/bugimport.py (+3/-3)
lib/lp/bugs/scripts/cveimport.py (+3/-3)
lib/lp/bugs/scripts/tests/test_bugimport.py (+2/-2)
lib/lp/hardwaredb/scripts/hwdbsubmissions.py (+3/-4)
lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py (+4/-3)
lib/lp/services/xmlrpc.py (+6/-1)
lib/lp/translations/utilities/xpi_header.py (+4/-6)
setup.py (+1/-0)
To merge this branch: bzr merge lp:~cjwatson/launchpad/defusedxml
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+368991@code.launchpad.net

Commit message

Use defusedxml to parse untrusted XML.

Description of the change

Python's standard library documentation recommends this (https://docs.python.org/2/library/xml.html#xml-vulnerabilities), and it seems like a good idea to harden Launchpad against something like a malicious external bug tracker.

The monkey-patching requirement for XML-RPC is a bit unfortunate, but this is mostly just for Bugzilla and Trac so it's tolerable.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'constraints.txt'
2--- constraints.txt 2019-06-14 14:31:08 +0000
3+++ constraints.txt 2019-06-18 17:05:22 +0000
4@@ -246,6 +246,7 @@
5 cssselect==0.9.1
6 cssutils==0.9.10
7 d2to1==0.2.12
8+defusedxml==0.6.0
9 Django==1.4
10 dkimpy==0.5.4
11 # Required by dkimpy
12
13=== modified file 'lib/lp/bugs/externalbugtracker/bugzilla.py'
14--- lib/lp/bugs/externalbugtracker/bugzilla.py 2018-11-12 10:59:20 +0000
15+++ lib/lp/bugs/externalbugtracker/bugzilla.py 2019-06-18 17:05:22 +0000
16@@ -1,4 +1,4 @@
17-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
18+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
19 # GNU Affero General Public License version 3 (see the file LICENSE).
20
21 """Bugzilla ExternalBugTracker utility."""
22@@ -15,10 +15,10 @@
23 from httplib import BadStatusLine
24 import re
25 import string
26-from xml.dom import minidom
27 import xml.parsers.expat
28 import xmlrpclib
29
30+from defusedxml import minidom
31 import pytz
32 import requests
33 import six
34@@ -192,6 +192,8 @@
35 bad_chars = bad_chars.replace(char, '')
36 trans_map = string.maketrans(bad_chars, ' ' * len(bad_chars))
37 contents = contents.translate(trans_map)
38+ # Don't use forbid_dtd=True here; Bugzilla XML responses seem to
39+ # include DOCTYPE declarations.
40 return minidom.parseString(contents)
41
42 def _probe_version(self):
43
44=== modified file 'lib/lp/bugs/externalbugtracker/xmlrpc.py'
45--- lib/lp/bugs/externalbugtracker/xmlrpc.py 2018-12-10 13:54:34 +0000
46+++ lib/lp/bugs/externalbugtracker/xmlrpc.py 2019-06-18 17:05:22 +0000
47@@ -1,4 +1,4 @@
48-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
49+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
50 # GNU Affero General Public License version 3 (see the file LICENSE).
51
52 """An XMLRPC transport which uses requests."""
53@@ -19,6 +19,7 @@
54 Transport,
55 )
56
57+from defusedxml.xmlrpc import monkey_patch
58 import requests
59 from requests.cookies import RequestsCookieJar
60 import six
61@@ -31,6 +32,9 @@
62 )
63 from lp.services.utils import traceback_info
64
65+# Protect against various XML parsing vulnerabilities.
66+monkey_patch()
67+
68
69 class RequestsTransport(Transport):
70 """An XML-RPC transport which uses requests.
71
72=== modified file 'lib/lp/bugs/scripts/bugimport.py'
73--- lib/lp/bugs/scripts/bugimport.py 2016-09-19 12:33:59 +0000
74+++ lib/lp/bugs/scripts/bugimport.py 2019-06-18 17:05:22 +0000
75@@ -1,4 +1,4 @@
76-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
77+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
78 # GNU Affero General Public License version 3 (see the file LICENSE).
79
80 """An XML bug importer
81@@ -20,8 +20,8 @@
82 import logging
83 import os
84 import time
85-from xml.etree import cElementTree
86
87+from defusedxml import cElementTree
88 import pytz
89 from storm.store import Store
90 from zope.component import getUtility
91@@ -237,7 +237,7 @@
92
93 def importBugs(self, ztm):
94 """Import bugs from a file."""
95- tree = cElementTree.parse(self.bugs_filename)
96+ tree = cElementTree.parse(self.bugs_filename, forbid_dtd=True)
97 root = tree.getroot()
98 assert root.tag == '{%s}launchpad-bugs' % BUGS_XMLNS, (
99 "Root element is wrong: %s" % root.tag)
100
101=== modified file 'lib/lp/bugs/scripts/cveimport.py'
102--- lib/lp/bugs/scripts/cveimport.py 2018-06-05 16:48:05 +0000
103+++ lib/lp/bugs/scripts/cveimport.py 2019-06-18 17:05:22 +0000
104@@ -1,4 +1,4 @@
105-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
106+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
107 # GNU Affero General Public License version 3 (see the file LICENSE).
108
109 """A set of functions related to the ability to parse the XML CVE database,
110@@ -10,8 +10,8 @@
111 import gzip
112 import io
113 import time
114-import xml.etree.cElementTree as cElementTree
115
116+import defusedxml.cElementTree as cElementTree
117 import requests
118 from zope.component import getUtility
119 from zope.event import notify
120@@ -248,7 +248,7 @@
121
122 :param cve_xml: The CVE XML as a string.
123 """
124- dom = cElementTree.fromstring(cve_xml)
125+ dom = cElementTree.fromstring(cve_xml, forbid_dtd=True)
126 items = dom.findall(CVEDB_NS + 'item')
127 if len(items) == 0:
128 raise LaunchpadScriptFailure("No CVEs found in XML file.")
129
130=== modified file 'lib/lp/bugs/scripts/tests/test_bugimport.py'
131--- lib/lp/bugs/scripts/tests/test_bugimport.py 2016-09-19 12:33:59 +0000
132+++ lib/lp/bugs/scripts/tests/test_bugimport.py 2019-06-18 17:05:22 +0000
133@@ -1,12 +1,12 @@
134-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
135+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
136 # GNU Affero General Public License version 3 (see the file LICENSE).
137
138 __metaclass__ = type
139
140 import os
141 import re
142-import xml.etree.cElementTree as ET
143
144+import defusedxml.cElementTree as ET
145 import pytz
146 from testtools.content import text_content
147 import transaction
148
149=== modified file 'lib/lp/hardwaredb/scripts/hwdbsubmissions.py'
150--- lib/lp/hardwaredb/scripts/hwdbsubmissions.py 2019-04-16 17:16:08 +0000
151+++ lib/lp/hardwaredb/scripts/hwdbsubmissions.py 2019-06-18 17:05:22 +0000
152@@ -1,4 +1,4 @@
153-# Copyright 2009-2018 Canonical Ltd. This software is licensed under the
154+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
155 # GNU Affero General Public License version 3 (see the file LICENSE).
156
157 """Parse Hardware Database submissions.
158@@ -25,8 +25,8 @@
159 import os
160 import re
161 import sys
162-import xml.etree.cElementTree as etree
163
164+import defusedxml.cElementTree as etree
165 import pytz
166 from zope.component import getUtility
167 from zope.interface import implementer
168@@ -133,7 +133,6 @@
169 if logger is None:
170 logger = getLogger()
171 self.logger = logger
172- self.doc_parser = etree.XMLParser()
173 self._logged_warnings = set()
174
175 self.validator = {}
176@@ -216,7 +215,7 @@
177 """
178 submission = self.fixFrequentErrors(submission)
179 try:
180- tree = etree.parse(io.BytesIO(submission), parser=self.doc_parser)
181+ tree = etree.parse(io.BytesIO(submission), forbid_dtd=True)
182 except SyntaxError as error_value:
183 self._logError(error_value, submission_key)
184 return None
185
186=== modified file 'lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py'
187--- lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py 2019-04-16 17:16:08 +0000
188+++ lib/lp/hardwaredb/scripts/tests/test_hwdb_submission_parser.py 2019-06-18 17:05:22 +0000
189@@ -1,4 +1,4 @@
190-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
191+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
192 # GNU Affero General Public License version 3 (see the file LICENSE).
193
194 """Tests of the HWDB submissions parser."""
195@@ -10,8 +10,9 @@
196 import logging
197 import os
198 from textwrap import dedent
199-import xml.etree.cElementTree as etree
200+from xml.etree.cElementTree import Element
201
202+import defusedxml.cElementTree as etree
203 import pytz
204 from zope.testing.loghandler import Handler
205
206@@ -155,7 +156,7 @@
207
208 def getTimestampETreeNode(self, time_string):
209 """Return an Elementtree node for an XML tag with a timestamp."""
210- return etree.Element('date_created', value=time_string)
211+ return Element('date_created', value=time_string)
212
213 def testTimeConversion(self):
214 """Test of the conversion of a "time string" into datetime object."""
215
216=== modified file 'lib/lp/services/xmlrpc.py'
217--- lib/lp/services/xmlrpc.py 2014-08-29 01:41:14 +0000
218+++ lib/lp/services/xmlrpc.py 2019-06-18 17:05:22 +0000
219@@ -1,4 +1,4 @@
220-# Copyright 2010 Canonical Ltd. This software is licensed under the
221+# Copyright 2010-2019 Canonical Ltd. This software is licensed under the
222 # GNU Affero General Public License version 3 (see the file LICENSE).
223
224 """Generic code for XML-RPC in Launchpad."""
225@@ -12,6 +12,11 @@
226 import socket
227 import xmlrpclib
228
229+from defusedxml.xmlrpc import monkey_patch
230+
231+# Protect against various XML parsing vulnerabilities.
232+monkey_patch()
233+
234
235 class LaunchpadFault(xmlrpclib.Fault):
236 """Base class for a Launchpad XMLRPC fault.
237
238=== modified file 'lib/lp/translations/utilities/xpi_header.py'
239--- lib/lp/translations/utilities/xpi_header.py 2015-10-14 16:23:18 +0000
240+++ lib/lp/translations/utilities/xpi_header.py 2019-06-18 17:05:22 +0000
241@@ -1,4 +1,4 @@
242-# Copyright 2009 Canonical Ltd. This software is licensed under the
243+# Copyright 2009-2019 Canonical Ltd. This software is licensed under the
244 # GNU Affero General Public License version 3 (see the file LICENSE).
245
246 __metaclass__ = type
247@@ -7,13 +7,10 @@
248 'XpiHeader',
249 ]
250
251-try:
252- import xml.etree.cElementTree as cElementTree
253-except ImportError:
254- import cElementTree
255 from email.utils import parseaddr
256 from StringIO import StringIO
257
258+import defusedxml.cElementTree as cElementTree
259 from zope.interface import implementer
260
261 from lp.translations.interfaces.translationcommonformat import (
262@@ -68,7 +65,8 @@
263 # Both cElementTree and elementtree fail when trying to parse
264 # proper unicode strings. Use our raw input instead.
265 try:
266- parse = cElementTree.iterparse(StringIO(self._raw_content))
267+ parse = cElementTree.iterparse(
268+ StringIO(self._raw_content), forbid_dtd=True)
269 for event, elem in parse:
270 if elem.tag == contributor_tag:
271 # An XPI header can list multiple contributors, but
272
273=== modified file 'setup.py'
274--- setup.py 2019-04-24 16:13:34 +0000
275+++ setup.py 2019-06-18 17:05:22 +0000
276@@ -153,6 +153,7 @@
277 'celery',
278 'cssselect',
279 'cssutils',
280+ 'defusedxml',
281 'dkimpy',
282 # Required for dkimpy
283 'dnspython',