Merge lp:~stefanor/ibid/google-api-336419 into lp:~ibid-core/ibid/old-trunk-pack-0.92

Proposed by Stefano Rivera
Status: Merged
Approved by: Stefano Rivera
Approved revision: 564
Merged at revision: 559
Proposed branch: lp:~stefanor/ibid/google-api-336419
Merge into: lp:~ibid-core/ibid/old-trunk-pack-0.92
Diff against target: None lines
To merge this branch: bzr merge lp:~stefanor/ibid/google-api-336419
Reviewer Review Type Date Requested Status
Jonathan Hitchcock Approve
Michael Gorven Approve
Review via email: mp+4120@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stefano Rivera (stefanor) wrote :

We lose a teeny-bit of functionality (ZA search), but get a really nice fast search without silly google tracking URLs.

lp:~stefanor/ibid/google-api-336419 updated
559. By Stefano Rivera

Handle 0 items returned correctly

Revision history for this message
Michael Gorven (mgorven) wrote :

I'd like both scraping and API searches to work. The plugin should work out
the box, and the bot owner can then get an API key and use the API search if
they want to. We could do this by having a Processor for each, with the API
processor at a lower priority.
 review needs_fixing

review: Needs Fixing
Revision history for this message
Stefano Rivera (stefanor) wrote :

"The plugin should work out the box, and the bot owner can then get an API key and use the API search if they want to."

It does work out the box. The API key is optional. If you are doing very high volumes of search, Google will be less likely to to disconnect you if you are using a key.

lp:~stefanor/ibid/google-api-336419 updated
560. By Stefano Rivera

Split Google into API and scrape processors.
Change googlefight syntax.

561. By Stefano Rivera

Country search

562. By Stefano Rivera

Return more than one result for country search

Revision history for this message
Michael Gorven (mgorven) wrote :

 review approve

review: Approve
Revision history for this message
Jonathan Hitchcock (vhata) :
review: Approve
lp:~stefanor/ibid/google-api-336419 updated
563. By Stefano Rivera

Don't include a list of countries, just match [a-z]{2}

564. By Stefano Rivera

Allow the for in 'google.TLD for terms'

565. By Stefano Rivera

Remove entities from results

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'ibid/plugins/google.py'
--- ibid/plugins/google.py 2009-03-01 23:01:30 +0000
+++ ibid/plugins/google.py 2009-03-03 22:52:47 +0000
@@ -1,53 +1,81 @@
1import htmlentitydefs
2import re
3import simplejson
1from urllib import quote4from urllib import quote
2from urllib2 import urlopen, Request5from urllib2 import urlopen, Request
6
3from BeautifulSoup import BeautifulSoup7from BeautifulSoup import BeautifulSoup
48
5from ibid.plugins import Processor, match9from ibid.plugins import Processor, match
6from ibid.config import Option10from ibid.config import Option
11from ibid.utils import ibid_version
712
8help = {'google': u'Retrieves results from Google and Google Calculator.'}13help = {'google': u'Retrieves results from Google and Google Calculator.'}
914
10user_agent = 'Mozilla/5.0'15user_agent = 'Mozilla/5.0'
1116
12class Search(Processor):17class Search(Processor):
13 u"""google [for] <term>"""18 u"""google [for] <term>
19 gcalc <expression>
20 gdefine <term>
21 google cmp [for] <term> and <term>"""
14 feature = 'google'22 feature = 'google'
1523
16 user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)24 api_key = Option('api_key', 'Your Google API Key (optional)', None)
1725 referrer = Option('referrer', 'The referrer string to use (API searches)', "http://ibid.omnia.za.net/")
18 @match(r'^google\s+(?:(za)\s+)?(?:for\s+)?(.+?)$')26 user_agent = Option('user_agent', 'HTTP user agent to present to Google (for non-API searches)', user_agent)
19 def search(self, event, country, query):27
20 url = 'http://www.google.com/search?num=3&q=%s' % quote(query)28 google_api_url = "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&q=%s"
21 if country:29 google_scrape_url = "http://www.google.com/search?num=1&q=%s"
22 url = url + '&meta=cr%%3Dcountry%s' % country.upper()30
2331 def _google_api_search(self, query, resultsize="large"):
24 f = urlopen(Request(url, headers={'user-agent': self.user_agent}))32 url = self.google_api_url % quote(query)
33 url += "&rsz=%s" % resultsize
34 if self.api_key:
35 url += '&key=%s' % quote(key)
36 req = Request(url, headers={
37 'user-agent': "Ibid/%s" % ibid_version() or "dev",
38 'referrer': self.referrer,
39 })
40 f = urlopen(req)
41 result = f.readlines()
42 f.close()
43 result = "".join(result)
44 result = simplejson.loads(result)
45 return result
46
47 def _google_scrape_search(self, query):
48 f = urlopen(Request(self.google_scrape_url % quote(query), headers={'user-agent': self.user_agent}))
25 soup = BeautifulSoup(f.read())49 soup = BeautifulSoup(f.read())
26 f.close()50 f.close()
2751 return soup
52
53 @match(r'^google\s+(?:for\s+)?(.+?)$')
54 def search(self, event, query):
55
56 # Clashing regexs:
57 if self.compare.im_func.pattern.match(event.message):
58 return
59
60 items = self._google_api_search(query)
28 results = []61 results = []
29 items = soup.findAll('li')[:10]62 for item in items["responseData"]["results"]:
30 for item in items:63
31 try:64 title = item["titleNoFormatting"]
32 url = item.a['href']65
33 title = u''.join([e.string for e in item.a.contents])66 replace = lambda match: unichr(int(match.group(1)))
34 results.append(u'"%s" %s' % (title, url))67 title = re.sub("&#(\d+);", replace, title)
35 except Exception:68
36 pass69 replace = lambda match: unichr(htmlentitydefs.name2codepoint[match.group(1)])
3770 title = re.sub("&(\w+);", replace, title)
71
72 results.append(u'"%s" %s' % (title, item["unescapedUrl"]))
38 event.addresponse(u', '.join(results))73 event.addresponse(u', '.join(results))
3974
40class Calc(Processor):
41 u"""gcalc <expression>"""
42 feature = 'google'
43
44 user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
45
46 @match(r'^gcalc\s+(.+)$')75 @match(r'^gcalc\s+(.+)$')
47 def calc(self, event, expression):76 def calc(self, event, expression):
48 f = urlopen(Request('http://www.google.com/search?num=1&q=%s' % quote(expression), headers={'user-agent': self.user_agent}))77
49 soup = BeautifulSoup(f.read())78 soup = self._google_scrape_search(expression)
50 f.close()
5179
52 font = soup.find('font', size='+1')80 font = soup.find('font', size='+1')
53 if not font:81 if not font:
@@ -55,51 +83,25 @@
55 else:83 else:
56 event.addresponse(font.b.string)84 event.addresponse(font.b.string)
5785
58class Define(Processor):
59 u"""gdefine <term>"""
60 feature = 'google'
61
62 user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
63
64 @match(r'^gdefine\s+(.+)$')86 @match(r'^gdefine\s+(.+)$')
65 def define(self, event, term):87 def define(self, event, term):
66 f = urlopen(Request('http://www.google.com/search?num=1&q=define:%s' % quote(term), headers={'user-agent': self.user_agent}))88
67 soup = BeautifulSoup(f.read())89 soup = self._google_scrape_search("define:%s" % term)
68 f.close()
6990
70 definitions = []91 definitions = []
71 for li in soup.findAll('li'):92 for li in soup.findAll('li'):
72 definitions.append('"%s"' % li.contents[0].strip())93 definitions.append(li.contents[0].strip())
7394
74 if definitions:95 if definitions:
75 event.addresponse(', '.join(definitions))96 event.addresponse(u' :: '.join(definitions))
76 else:97 else:
77 event.addresponse(u"Are you making up words again?")98 event.addresponse(u"Are you making up words again?")
7899
79class Compare(Processor):
80 u"""google cmp [for] <term> and <term>"""
81 feature = 'google'
82
83 user_agent = Option('user_agent', 'HTTP user agent to present to Google', user_agent)
84
85
86 def results(self, term):
87 f = urlopen(Request('http://www.google.com/search?num=1&q=%s' % quote(term), headers={'user-agent': self.user_agent}))
88 soup = BeautifulSoup(f.read())
89 f.close()
90
91 noresults = soup.findAll('div', attrs={'class': 'med'})
92 if noresults and len(noresults) > 1 and noresults[1].find('did not match any documents') != -1:
93 return 0
94 else:
95 results = soup.find('div', id='prs').nextSibling.contents[5].string.replace(',', '')
96 if results:
97 return int(results)
98
99 @match(r'^google\s+cmp\s+(?:for\s+)?(.+?)\s+and\s+(.+?)$')100 @match(r'^google\s+cmp\s+(?:for\s+)?(.+?)\s+and\s+(.+?)$')
100 def compare(self, event, term1, term2):101 def compare(self, event, term1, term2):
101 count1 = self.results(term1)102 count1 = int(self._google_api_search(term1, "small")["responseData"]["cursor"]["estimatedResultCount"])
102 count2 = self.results(term2)103 count2 = int(self._google_api_search(term2, "small")["responseData"]["cursor"]["estimatedResultCount"])
103 event.addresponse(u'%s wins with %s hits, %s had %s hits' % (count1 > count2 and term1 or term2, count1 > count2 and count1 or count2, count1 > count2 and term2 or term1, count1 > count2 and count2 or count1))104 event.addresponse(u'%s wins with %i hits, %s had %i hits' %
104105 (count1 > count2 and (term1, count1, term2, count2) or (term2, count2, term1, count1))
106 )
105# vi: set et sta sw=4 ts=4:107# vi: set et sta sw=4 ts=4:
106108
=== modified file 'ibid/plugins/misc.py'
--- ibid/plugins/misc.py 2009-03-02 09:21:35 +0000
+++ ibid/plugins/misc.py 2009-03-03 22:52:47 +0000
@@ -1,9 +1,8 @@
1from time import sleep1from time import sleep
22
3from pkg_resources import resource_exists, resource_string
4
5from ibid.plugins import Processor, match3from ibid.plugins import Processor, match
6from ibid.config import IntOption4from ibid.config import IntOption
5from ibid.utils import ibid_version
76
8help = {}7help = {}
98
@@ -41,8 +40,6 @@
41 self.pot.append(event.sender['nick'])40 self.pot.append(event.sender['nick'])
42 event.addresponse(True)41 event.addresponse(True)
4342
44version = resource_exists(__name__, '../.version') and resource_string(__name__, '../.version') or None
45
46help['version'] = u"Show the Ibid version currently running"43help['version'] = u"Show the Ibid version currently running"
47class Version(Processor):44class Version(Processor):
48 u"""version"""45 u"""version"""
@@ -50,7 +47,7 @@
5047
51 @match(r'^version$')48 @match(r'^version$')
52 def show_version(self, event):49 def show_version(self, event):
53 event.addresponse(version and u"I am version %s" % version or u"I don't know what version I am :-(")50 event.addresponse(ibid_version() and u"I am version %s" % ibid_version() or u"I don't know what version I am :-(")
5451
55help['dvorak'] = u"Makes text typed on a QWERTY keyboard as if it was Dvorak work, and vice-versa"52help['dvorak'] = u"Makes text typed on a QWERTY keyboard as if it was Dvorak work, and vice-versa"
56class Dvorak(Processor):53class Dvorak(Processor):
5754
=== modified file 'ibid/utils.py'
--- ibid/utils.py 2009-03-01 19:58:06 +0000
+++ ibid/utils.py 2009-03-03 22:52:47 +0000
@@ -1,6 +1,7 @@
1from htmlentitydefs import name2codepoint1from htmlentitydefs import name2codepoint
2import os2import os
3import os.path3import os.path
4from pkg_resources import resource_exists, resource_string
4import re5import re
56
6def ago(delta, units=None):7def ago(delta, units=None):
@@ -43,3 +44,6 @@
43 except:44 except:
44 encoding = "ascii"45 encoding = "ascii"
45 return unicode(output, encoding, errors)46 return unicode(output, encoding, errors)
47
48def ibid_version():
49 return resource_exists(__name__, '.version') and resource_string(__name__, '.version').strip() or None

Subscribers

People subscribed via source and target branches