Merge lp:~stefanor/ibid/microblogging-345112 into lp:~ibid-core/ibid/old-trunk-pack-0.92

Proposed by Stefano Rivera
Status: Merged
Approved by: Jonathan Hitchcock
Approved revision: 591
Merged at revision: 588
Proposed branch: lp:~stefanor/ibid/microblogging-345112
Merge into: lp:~ibid-core/ibid/old-trunk-pack-0.92
Diff against target: None lines
To merge this branch: bzr merge lp:~stefanor/ibid/microblogging-345112
Reviewer Review Type Date Requested Status
Jonathan Hitchcock Approve
Michael Gorven Approve
Review via email: mp+4657@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stefano Rivera (stefanor) wrote :

Does some other things that I came across while fixing the microblogging bug

Revision history for this message
Michael Gorven (mgorven) wrote :

 review approve

review: Approve
Revision history for this message
Jonathan Hitchcock (vhata) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ibid/plugins/google.py'
2--- ibid/plugins/google.py 2009-03-09 19:26:27 +0000
3+++ ibid/plugins/google.py 2009-03-19 08:15:15 +0000
4@@ -1,29 +1,22 @@
5+from cgi import parse_qs
6 import htmlentitydefs
7 import re
8 import simplejson
9 from urllib import quote
10 from urllib2 import urlopen, Request
11+from urlparse import urlparse
12
13 from BeautifulSoup import BeautifulSoup
14
15 from ibid.plugins import Processor, match
16 from ibid.config import Option
17-from ibid.utils import ibid_version
18+from ibid.utils import decode_htmlentities, ibid_version
19
20 help = {'google': u'Retrieves results from Google and Google Calculator.'}
21
22 default_user_agent = 'Mozilla/5.0'
23 default_referrer = "http://ibid.omnia.za.net/"
24
25-def de_entity(text):
26- "Remove HTML entities, and replace with their characters"
27- replace = lambda match: unichr(int(match.group(1)))
28- text = re.sub("&#(\d+);", replace, text)
29-
30- replace = lambda match: unichr(htmlentitydefs.name2codepoint[match.group(1)])
31- text = re.sub("&(\w+);", replace, text)
32- return text
33-
34 class GoogleAPISearch(Processor):
35 u"""google [for] <term>
36 googlefight [for] <term> and <term>"""
37@@ -58,7 +51,7 @@
38
39 title = item["titleNoFormatting"]
40
41- results.append(u'"%s" %s' % (de_entity(title), item["unescapedUrl"]))
42+ results.append(u'"%s" %s' % (decode_htmlentities(title), item["unescapedUrl"]))
43
44 if results:
45 event.addresponse(u'%s', u', '.join(results))
46@@ -120,7 +113,7 @@
47
48 definitions = []
49 for li in soup.findAll('li'):
50- definitions.append(de_entity(li.contents[0].strip()))
51+ definitions.append(decode_htmlentities(li.contents[0].strip()))
52
53 if definitions:
54 event.addresponse(u'%s', u' :: '.join(definitions))
55@@ -137,10 +130,12 @@
56 for item in items:
57 try:
58 url = item.a['href']
59+ if url.startswith(u"/aclk?"):
60+ url = parse_qs(urlparse(url).query)['q'][0]
61 title = u''.join([e.string for e in item.a.contents])
62 if title.startswith("Image results for"):
63 continue
64- results.append(u'"%s" %s' % (de_entity(title), url))
65+ results.append(u'"%s" %s' % (decode_htmlentities(title), url))
66 except Exception:
67 pass
68 if len(results) >= 8:
69
70=== modified file 'ibid/plugins/lookup.py'
71--- ibid/plugins/lookup.py 2009-03-18 11:36:41 +0000
72+++ ibid/plugins/lookup.py 2009-03-19 08:16:53 +0000
73@@ -155,7 +155,7 @@
74 status = loads(f.read())
75 f.close()
76
77- return {'screen_name': status['user']['screen_name'], 'text': status['text']}
78+ return {'screen_name': status['user']['screen_name'], 'text': decode_htmlentities(status['text'])}
79
80 def remote_latest(self, service, user):
81 service_url = self.services[service]
82@@ -170,7 +170,7 @@
83 url = "%s/notice/%i" % (service_url[:-5], latest["id"])
84
85 return {
86- 'text': latest['text'],
87+ 'text': decode_htmlentities(latest['text']),
88 'ago': ago(datetime.utcnow() - datetime.strptime(latest["created_at"], '%a %b %d %H:%M:%S +0000 %Y'), 1),
89 'url': url,
90 }
91
92=== modified file 'ibid/utils.py'
93--- ibid/utils.py 2009-03-10 10:45:47 +0000
94+++ ibid/utils.py 2009-03-19 08:05:41 +0000
95@@ -34,21 +34,13 @@
96 formatted = ' and '.join(parts)
97 return formatted.replace(' and ', ', ', len(parts)-2)
98
99-def substitute_entity(match):
100- ent = match.group(2)
101- if match.group(1) == "#":
102- return unichr(int(ent))
103- else:
104- cp = name2codepoint.get(ent)
105-
106- if cp:
107- return unichr(cp)
108- else:
109- return match.group()
110-
111-def decode_htmlentities(string):
112- entity_re = re.compile("&(#?)(\d{1,5}|\w{1,8});")
113- return entity_re.subn(substitute_entity, string)[0]
114+def decode_htmlentities(text):
115+ replace = lambda match: unichr(int(match.group(1)))
116+ text = re.sub("&#(\d+);", replace, text)
117+
118+ replace = lambda match: match.group(1) in name2codepoint and unichr(name2codepoint[match.group(1)]) or match.group(0)
119+ text = re.sub("&(\w+);", replace, text)
120+ return text
121
122 def cacheable_download(url, cachefile):
123 """Download url to cachefile if it's modified since cachefile.

Subscribers

People subscribed via source and target branches