Merge lp:~stefanor/ibid/google-api-336419 into lp:~ibid-core/ibid/old-trunk-pack-0.92

Proposed by Stefano Rivera
Status: Merged
Approved by: Stefano Rivera
Approved revision: 564
Merged at revision: 559
Proposed branch: lp:~stefanor/ibid/google-api-336419
Merge into: lp:~ibid-core/ibid/old-trunk-pack-0.92
Diff against target: None lines
To merge this branch: bzr merge lp:~stefanor/ibid/google-api-336419
Reviewer Review Type Date Requested Status
Jonathan Hitchcock Approve
Michael Gorven Approve
Review via email: mp+4120@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stefano Rivera (stefanor) wrote :

We lose a teeny-bit of functionality (ZA search), but get a really nice fast search without silly google tracking URLs.

lp:~stefanor/ibid/google-api-336419 updated
559. By Stefano Rivera

Handle 0 items returned correctly

Revision history for this message
Michael Gorven (mgorven) wrote :

I'd like both scraping and API searches to work. The plugin should work out
the box, and the bot owner can then get an API key and use the API search if
they want to. We could do this by having a Processor for each, with the API
processor at a lower priority.
 review needs_fixing

review: Needs Fixing
Revision history for this message
Stefano Rivera (stefanor) wrote :

"The plugin should work out the box, and the bot owner can then get an API key and use the API search if they want to."

It does work out the box. The API key is optional. If you are doing very high volumes of search, Google will be less likely to to disconnect you if you are using a key.

lp:~stefanor/ibid/google-api-336419 updated
560. By Stefano Rivera

Split Google into API and scrape processors.
Change googlefight syntax.

561. By Stefano Rivera

Country search

562. By Stefano Rivera

Return more than one result for country search

Revision history for this message
Michael Gorven (mgorven) wrote :

 review approve

review: Approve
Revision history for this message
Jonathan Hitchcock (vhata) :
review: Approve
lp:~stefanor/ibid/google-api-336419 updated
563. By Stefano Rivera

Don't include a list of countries, just match [a-z]{2}

564. By Stefano Rivera

Allow the for in 'google.TLD for terms'

565. By Stefano Rivera

Remove entities from results

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ibid/plugins/google.py'
2--- ibid/plugins/google.py 2009-03-01 23:01:30 +0000
3+++ ibid/plugins/google.py 2009-03-03 22:52:47 +0000
4@@ -1,53 +1,81 @@
5+import htmlentitydefs
6+import re
7+import simplejson
8 from urllib import quote
9 from urllib2 import urlopen, Request
10+
11 from BeautifulSoup import BeautifulSoup
12
13 from ibid.plugins import Processor, match
14 from ibid.config import Option
15+from ibid.utils import ibid_version
16
17 help = {'google': u'Retrieves results from Google and Google Calculator.'}
18
19 user_agent = 'Mozilla/5.0'
20
21 class Search(Processor):
22- u"""google [for] <term>"""
23+ u"""google [for] <term>
24+ gcalc <expression>
25+ gdefine <term>
26+ google cmp [for] <term> and <term>"""
27 feature = 'google'
28
29- user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
30-
31- @match(r'^google\s+(?:(za)\s+)?(?:for\s+)?(.+?)$')
32- def search(self, event, country, query):
33- url = 'http://www.google.com/search?num=3&q=%s' % quote(query)
34- if country:
35- url = url + '&meta=cr%%3Dcountry%s' % country.upper()
36-
37- f = urlopen(Request(url, headers={'user-agent': self.user_agent}))
38+ api_key = Option('api_key', 'Your Google API Key (optional)', None)
39+ referrer = Option('referrer', 'The referrer string to use (API searches)', "http://ibid.omnia.za.net/")
40+ user_agent = Option('user_agent', 'HTTP user agent to present to Google (for non-API searches)', user_agent)
41+
42+ google_api_url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s"
43+ google_scrape_url = "http://www.google.com/search?num=1&q=%s"
44+
45+ def _google_api_search(self, query, resultsize="large"):
46+ url = self.google_api_url % quote(query)
47+ url += "&rsz=%s" % resultsize
48+ if self.api_key:
49+ url += '&key=%s' % quote(key)
50+ req = Request(url, headers={
51+ 'user-agent': "Ibid/%s" % ibid_version() or "dev",
52+ 'referrer': self.referrer,
53+ })
54+ f = urlopen(req)
55+ result = f.readlines()
56+ f.close()
57+ result = "".join(result)
58+ result = simplejson.loads(result)
59+ return result
60+
61+ def _google_scrape_search(self, query):
62+ f = urlopen(Request(self.google_scrape_url % quote(query), headers={'user-agent': self.user_agent}))
63 soup = BeautifulSoup(f.read())
64 f.close()
65-
66+ return soup
67+
68+ @match(r'^google\s+(?:for\s+)?(.+?)$')
69+ def search(self, event, query):
70+
71+ # Clashing regexs:
72+ if self.compare.im_func.pattern.match(event.message):
73+ return
74+
75+ items = self._google_api_search(query)
76 results = []
77- items = soup.findAll('li')[:10]
78- for item in items:
79- try:
80- url = item.a['href']
81- title = u''.join([e.string for e in item.a.contents])
82- results.append(u'"%s" %s' % (title, url))
83- except Exception:
84- pass
85-
86+ for item in items["responseData"]["results"]:
87+
88+ title = item["titleNoFormatting"]
89+
90+ replace = lambda match: unichr(int(match.group(1)))
91+ title = re.sub("&#(\d+);", replace, title)
92+
93+ replace = lambda match: unichr(htmlentitydefs.name2codepoint[match.group(1)])
94+ title = re.sub("&(\w+);", replace, title)
95+
96+ results.append(u'"%s" %s' % (title, item["unescapedUrl"]))
97 event.addresponse(u', '.join(results))
98
99-class Calc(Processor):
100- u"""gcalc <expression>"""
101- feature = 'google'
102-
103- user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
104-
105 @match(r'^gcalc\s+(.+)$')
106 def calc(self, event, expression):
107- f = urlopen(Request('http://www.google.com/search?num=1&q=%s' % quote(expression), headers={'user-agent': self.user_agent}))
108- soup = BeautifulSoup(f.read())
109- f.close()
110+
111+ soup = self._google_scrape_search(expression)
112
113 font = soup.find('font', size='+1')
114 if not font:
115@@ -55,51 +83,25 @@
116 else:
117 event.addresponse(font.b.string)
118
119-class Define(Processor):
120- u"""gdefine <term>"""
121- feature = 'google'
122-
123- user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
124-
125 @match(r'^gdefine\s+(.+)$')
126 def define(self, event, term):
127- f = urlopen(Request('http://www.google.com/search?num=1&q=define:%s' % quote(term), headers={'user-agent': self.user_agent}))
128- soup = BeautifulSoup(f.read())
129- f.close()
130+
131+ soup = self._google_scrape_search("define:%s" % term)
132
133 definitions = []
134 for li in soup.findAll('li'):
135- definitions.append('"%s"' % li.contents[0].strip())
136+ definitions.append(li.contents[0].strip())
137
138 if definitions:
139- event.addresponse(', '.join(definitions))
140+ event.addresponse(u' :: '.join(definitions))
141 else:
142 event.addresponse(u"Are you making up words again?")
143
144-class Compare(Processor):
145- u"""google cmp [for] <term> and <term>"""
146- feature = 'google'
147-
148- user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
149-
150-
151- def results(self, term):
152- f = urlopen(Request('http://www.google.com/search?num=1&q=%s' % quote(term), headers={'user-agent': self.user_agent}))
153- soup = BeautifulSoup(f.read())
154- f.close()
155-
156- noresults = soup.findAll('div', attrs={'class': 'med'})
157- if noresults and len(noresults) > 1 and noresults[1].find('did not match any documents') != -1:
158- return 0
159- else:
160- results = soup.find('div', id='prs').nextSibling.contents[5].string.replace(',', '')
161- if results:
162- return int(results)
163-
164 @match(r'^google\s+cmp\s+(?:for\s+)?(.+?)\s+and\s+(.+?)$')
165 def compare(self, event, term1, term2):
166- count1 = self.results(term1)
167- count2 = self.results(term2)
168- event.addresponse(u'%s wins with %s hits, %s had %s hits' % (count1 > count2 and term1 or term2, count1 > count2 and count1 or count2, count1 > count2 and term2 or term1, count1 > count2 and count2 or count1))
169-
170+ count1 = int(self._google_api_search(term1, "small")["responseData"]["cursor"]["estimatedResultCount"])
171+ count2 = int(self._google_api_search(term2, "small")["responseData"]["cursor"]["estimatedResultCount"])
172+ event.addresponse(u'%s wins with %i hits, %s had %i hits' %
173+ (count1 > count2 and (term1, count1, term2, count2) or (term2, count2, term1, count1))
174+ )
175 # vi: set et sta sw=4 ts=4:
176
177=== modified file 'ibid/plugins/misc.py'
178--- ibid/plugins/misc.py 2009-03-02 09:21:35 +0000
179+++ ibid/plugins/misc.py 2009-03-03 22:52:47 +0000
180@@ -1,9 +1,8 @@
181 from time import sleep
182
183-from pkg_resources import resource_exists, resource_string
184-
185 from ibid.plugins import Processor, match
186 from ibid.config import IntOption
187+from ibid.utils import ibid_version
188
189 help = {}
190
191@@ -41,8 +40,6 @@
192 self.pot.append(event.sender['nick'])
193 event.addresponse(True)
194
195-version = resource_exists(__name__, '../.version') and resource_string(__name__, '../.version') or None
196-
197 help['version'] = u"Show the Ibid version currently running"
198 class Version(Processor):
199 u"""version"""
200@@ -50,7 +47,7 @@
201
202 @match(r'^version$')
203 def show_version(self, event):
204- event.addresponse(version and u"I am version %s" % version or u"I don't know what version I am :-(")
205+ event.addresponse(ibid_version() and u"I am version %s" % ibid_version() or u"I don't know what version I am :-(")
206
207 help['dvorak'] = u"Makes text typed on a QWERTY keyboard as if it was Dvorak work, and vice-versa"
208 class Dvorak(Processor):
209
210=== modified file 'ibid/utils.py'
211--- ibid/utils.py 2009-03-01 19:58:06 +0000
212+++ ibid/utils.py 2009-03-03 22:52:47 +0000
213@@ -1,6 +1,7 @@
214 from htmlentitydefs import name2codepoint
215 import os
216 import os.path
217+from pkg_resources import resource_exists, resource_string
218 import re
219
220 def ago(delta, units=None):
221@@ -43,3 +44,6 @@
222 except:
223 encoding = "ascii"
224 return unicode(output, encoding, errors)
225+
226+def ibid_version():
227+ return resource_exists(__name__, '.version') and resource_string(__name__, '.version').strip() or None

Subscribers

People subscribed via source and target branches