1
=== modified file 'import_dsc.py'
2
--- import_dsc.py	2010-02-12 19:58:29 +0000
3
+++ import_dsc.py	2010-02-18 22:17:11 +0000
4
@@ -76,6 +76,7 @@
5
76
    get_snapshot_revision,
76
    get_snapshot_revision,
6
77
    open_file_via_transport,
77
    open_file_via_transport,
7
78
    open_transport,
78
    open_transport,
8
79
    safe_decode,
9
79
    subprocess_setup,
80
    subprocess_setup,
10
80
    )
81
    )
11
81
82
12
@@ -1251,7 +1252,7 @@
13
1251
                 time_tuple = rfc822.parsedate_tz(raw_timestamp)
1252
                 time_tuple = rfc822.parsedate_tz(raw_timestamp)
14
1252
                 if time_tuple is not None:
1253
                 if time_tuple is not None:
15
1253
                     timestamp = (time.mktime(time_tuple[:9]), time_tuple[9])
1254
                     timestamp = (time.mktime(time_tuple[:9]), time_tuple[9])
17
1254
                 author = cl.author.decode("utf-8")
1255
                 author = safe_decode(cl.author)
18
1255
            versions = self._get_safe_versions_from_changelog(cl)
1256
            versions = self._get_safe_versions_from_changelog(cl)
19
1256
            assert not self.has_version(version), \
1257
            assert not self.has_version(version), \
20
1257
                "Trying to import version %s again" % str(version)
1258
                "Trying to import version %s again" % str(version)
21
1258
1259
22
=== modified file 'tests/test_util.py'
23
--- tests/test_util.py	2010-02-12 16:41:15 +0000
24
+++ tests/test_util.py	2010-02-18 22:17:11 +0000
25
@@ -45,6 +45,7 @@
26
45
                  move_file_if_different,
45
                  move_file_if_different,
27
46
                  get_parent_dir,
46
                  get_parent_dir,
28
47
                  recursive_copy,
47
                  recursive_copy,
29
48
                  safe_decode,
30
48
                  strip_changelog_message,
49
                  strip_changelog_message,
31
49
                  suite_to_distribution,
50
                  suite_to_distribution,
32
50
                  tarball_name,
51
                  tarball_name,
33
@@ -84,6 +85,19 @@
34
84
        self.failUnlessExists('a/f')
85
        self.failUnlessExists('a/f')
35
85
86
36
86
87
37
88
class SafeDecodeTests(TestCase):
38
89
39
90
    def assertSafeDecode(self, expected, val):
40
91
        self.assertEqual(expected, safe_decode(val))
41
92
42
93
    def test_utf8(self):
43
94
        self.assertSafeDecode(u'ascii', 'ascii')
44
95
        self.assertSafeDecode(u'\xe7', '\xc3\xa7')
45
96
46
97
    def test_iso_8859_1(self):
47
98
        self.assertSafeDecode(u'\xe7', '\xe7')
48
99
49
100
50
87
cl_block1 = """\
101
cl_block1 = """\
51
88
bzr-builddeb (0.17) unstable; urgency=low
102
bzr-builddeb (0.17) unstable; urgency=low
52
89
103
53
@@ -467,6 +481,22 @@
54
467
        self.assertEqual([u"A. Hacker", u"B. Hacker"], authors)
481
        self.assertEqual([u"A. Hacker", u"B. Hacker"], authors)
55
468
        self.assertEqual([unicode]*len(authors), map(type, authors))
482
        self.assertEqual([unicode]*len(authors), map(type, authors))
56
469
483
57
484
    def test_find_extra_authors_utf8(self):
58
485
        changes = ["  * Do foo", "", "  [ \xc3\xa1. Hacker ]", "  * Do bar", "",
59
486
                   "  [ \xc3\xa7. Hacker ]", "  [ A. Hacker}"]
60
487
        authors = find_extra_authors(changes)
61
488
        self.assertEqual([u"\xe1. Hacker", u"\xe7. Hacker"], authors)
62
489
        self.assertEqual([unicode]*len(authors), map(type, authors))
63
490
64
491
    def test_find_extra_authors_iso_8859_1(self):
65
492
        # We try to treat lines as utf-8, but if that fails to decode, we fall
66
493
        # back to iso-8859-1
67
494
        changes = ["  * Do foo", "", "  [ \xe1. Hacker ]", "  * Do bar", "",
68
495
                   "  [ \xe7. Hacker ]", "  [ A. Hacker}"]
69
496
        authors = find_extra_authors(changes)
70
497
        self.assertEqual([u"\xe1. Hacker", u"\xe7. Hacker"], authors)
71
498
        self.assertEqual([unicode]*len(authors), map(type, authors))
72
499
73
470
    def test_find_extra_authors_no_changes(self):
500
    def test_find_extra_authors_no_changes(self):
74
471
        authors = find_extra_authors([])
501
        authors = find_extra_authors([])
75
472
        self.assertEqual([], authors)
502
        self.assertEqual([], authors)
76
@@ -504,6 +534,8 @@
77
504
        self.assert_thanks_is(changes, [u"A. Hacker <ahacker@example.com>"])
534
        self.assert_thanks_is(changes, [u"A. Hacker <ahacker@example.com>"])
78
505
        changes = ["  * Thanks to Adeodato Sim\xc3\x83\xc2\xb3"]
535
        changes = ["  * Thanks to Adeodato Sim\xc3\x83\xc2\xb3"]
79
506
        self.assert_thanks_is(changes, [u"Adeodato Sim\xc3\xb3"])
536
        self.assert_thanks_is(changes, [u"Adeodato Sim\xc3\xb3"])
80
537
        changes = ["  * Thanks to \xc3\x81deodato Sim\xc3\x83\xc2\xb3"]
81
538
        self.assert_thanks_is(changes, [u"\xc1deodato Sim\xc3\xb3"])
82
507
539
83
508
    def test_find_bugs_fixed_no_changes(self):
540
    def test_find_bugs_fixed_no_changes(self):
84
509
        self.assertEqual([], find_bugs_fixed([], None, _lplib=MockLaunchpad()))
541
        self.assertEqual([], find_bugs_fixed([], None, _lplib=MockLaunchpad()))
85
@@ -582,6 +614,37 @@
86
582
        self.assertEqual(find_bugs_fixed(changes, wt.branch,
614
        self.assertEqual(find_bugs_fixed(changes, wt.branch,
87
583
                    _lplib=MockLaunchpad()), bugs)
615
                    _lplib=MockLaunchpad()), bugs)
88
584
616
89
617
    def assertUnicodeCommitInfo(self, changes):
90
618
        wt = self.make_branch_and_tree(".")
91
619
        changelog = Changelog()
92
620
        author = "J. Maintainer <maint@example.com>"
93
621
        changelog.new_block(changes=changes, author=author)
94
622
        message, authors, thanks, bugs = \
95
623
                get_commit_info_from_changelog(changelog, wt.branch,
96
624
                        _lplib=MockLaunchpad())
97
625
        self.assertEqual(u'[ \xc1. Hacker ]\n'
98
626
                         u'* First ch\xe1nge, LP: #12345\n'
99
627
                         u'* Second change, thanks to \xde. Hacker',
100
628
                         message)
101
629
        self.assertEqual([author, u'\xc1. Hacker'], authors)
102
630
        self.assertEqual(unicode, type(authors[0]))
103
631
        self.assertEqual([u'\xde. Hacker'], thanks)
104
632
        self.assertEqual(['https://launchpad.net/bugs/12345 fixed'], bugs)
105
633
106
634
    def test_get_commit_info_utf8(self):
107
635
        changes = ["  [ \xc3\x81. Hacker ]",
108
636
                   "  * First ch\xc3\xa1nge, LP: #12345",
109
637
                   "  * Second change, thanks to \xc3\x9e. Hacker"]
110
638
        self.assertUnicodeCommitInfo(changes)
111
639
112
640
    def test_get_commit_info_iso_8859_1(self):
113
641
        # Changelogs aren't always well-formed UTF-8, so we fall back to
114
642
        # iso-8859-1 if we fail to decode utf-8.
115
643
        changes = ["  [ \xc1. Hacker ]",
116
644
                   "  * First ch\xe1nge, LP: #12345",
117
645
                   "  * Second change, thanks to \xde. Hacker"]
118
646
        self.assertUnicodeCommitInfo(changes)
119
647
120
585
648
121
586
class MockLaunchpad(object):
649
class MockLaunchpad(object):
122
587
650
123
588
651
124
=== modified file 'util.py'
125
--- util.py	2010-02-10 13:43:44 +0000
126
+++ util.py	2010-02-18 22:17:11 +0000
127
@@ -56,6 +56,24 @@
128
56
                )
56
                )
129
57
57
130
58
58
131
59
def safe_decode(s):
132
60
    """Decode a string into a Unicode value."""
133
61
    if isinstance(s, unicode): # Already unicode
134
62
        mutter('safe_decode() called on an already-unicode string: %r' % (s,))
135
63
        return s
136
64
    try:
137
65
        return s.decode('utf-8')
138
66
    except UnicodeDecodeError, e:
139
67
        mutter('safe_decode(%r) falling back to iso-8859-1' % (s,))
140
68
        # TODO: Looking at BeautifulSoup it seems to use 'chardet' to try to
141
69
        #       guess the encoding of a given text stream. We might want to
142
70
        #       take a closer look at that.
143
71
        # TODO: Another possibility would be to make the fallback encoding
144
72
        #       configurable, possibly exposed as a command-line flag, for now,
145
73
        #       this seems 'good enough'.
146
74
        return s.decode('iso-8859-1')
147
75
148
76
149
59
def recursive_copy(fromdir, todir):
77
def recursive_copy(fromdir, todir):
150
60
    """Copy the contents of fromdir to todir.
78
    """Copy the contents of fromdir to todir.
151
61
79
152
@@ -392,13 +410,13 @@
153
392
410
154
393
411
155
394
def find_extra_authors(changes):
412
def find_extra_authors(changes):
157
395
    extra_author_re = re.compile(r"\s*\[([^\]]+)]\s*", re.UNICODE)
413
    extra_author_re = re.compile(r"\s*\[([^\]]+)]\s*")
158
396
    authors = []
414
    authors = []
159
397
    for change in changes:
415
    for change in changes:
160
398
        # Parse out any extra authors.
416
        # Parse out any extra authors.
162
399
        match = extra_author_re.match(change.decode("utf-8"))
417
        match = extra_author_re.match(change)
163
400
        if match is not None:
418
        if match is not None:
165
401
            new_author = match.group(1).strip()
419
            new_author = safe_decode(match.group(1).strip())
166
402
            already_included = False
420
            already_included = False
167
403
            for author in authors:
421
            for author in authors:
168
404
                if author.startswith(new_author):
422
                if author.startswith(new_author):
169
@@ -411,11 +429,11 @@
170
411
429
171
412
def find_thanks(changes):
430
def find_thanks(changes):
172
413
    thanks_re = re.compile(r"[tT]hank(?:(?:s)|(?:you))(?:\s*to)?"
431
    thanks_re = re.compile(r"[tT]hank(?:(?:s)|(?:you))(?:\s*to)?"
174
414
            "((?:\s+(?:(?:[A-Z]\.)|(?:[A-Z]\w+(?:-[A-Z]\w+)*)))+"
432
            "((?:\s+(?:(?:\w\.)|(?:\w+(?:-\w+)*)))+"
175
415
            "(?:\s+<[^@>]+@[^@>]+>)?)",
433
            "(?:\s+<[^@>]+@[^@>]+>)?)",
176
416
            re.UNICODE)
434
            re.UNICODE)
177
417
    thanks = []
435
    thanks = []
179
418
    changes_str = " ".join(changes).decode("utf-8")
436
    changes_str = safe_decode(" ".join(changes))
180
419
    for match in thanks_re.finditer(changes_str):
437
    for match in thanks_re.finditer(changes_str):
181
420
        if thanks is None:
438
        if thanks is None:
182
421
            thanks = []
439
            thanks = []
183
@@ -446,12 +464,12 @@
184
446
    bugs = []
464
    bugs = []
185
447
    if changelog._blocks:
465
    if changelog._blocks:
186
448
        block = changelog._blocks[0]
466
        block = changelog._blocks[0]
188
449
        authors = [block.author.decode("utf-8")]
467
        authors = [safe_decode(block.author)]
189
450
        changes = strip_changelog_message(block.changes())
468
        changes = strip_changelog_message(block.changes())
190
451
        authors += find_extra_authors(changes)
469
        authors += find_extra_authors(changes)
191
452
        bugs = find_bugs_fixed(changes, branch, _lplib=_lplib)
470
        bugs = find_bugs_fixed(changes, branch, _lplib=_lplib)
192
453
        thanks = find_thanks(changes)
471
        thanks = find_thanks(changes)
194
454
        message = "\n".join(changes).replace("\r", "")
472
        message = safe_decode("\n".join(changes).replace("\r", ""))
195
455
    return (message, authors, thanks, bugs)
473
    return (message, authors, thanks, bugs)
196
456
474
197
457
475
Reviewer	Review Type	Date Requested	Status
Bzr-builddeb-hackers		2010-02-18	Pending
Review via email: mp+19662@code.launchpad.net