Merge lp:~stefanor/ibid/weather-scraping-370280 into lp:~ibid-core/ibid/old-trunk-pack-0.92

Proposed by Stefano Rivera
Status: Merged
Approved by: Jonathan Hitchcock
Approved revision: 646
Merged at revision: 652
Proposed branch: lp:~stefanor/ibid/weather-scraping-370280
Merge into: lp:~ibid-core/ibid/old-trunk-pack-0.92
Diff against target: None lines
To merge this branch: bzr merge lp:~stefanor/ibid/weather-scraping-370280
Reviewer Review Type Date Requested Status
Jonathan Hitchcock Approve
Michael Gorven Approve
Review via email: mp+6914@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stefano Rivera (stefanor) wrote :

The API approach was rejected so here's a simple fix of the bug. I tried using etree first (xpath is nicer than BS for this kind of kak), but etree didn't handle things like

<span class="nowrap"><b>28</b>&#176;C</span>

I could get at the 28, but the °C was lost.

Revision history for this message
Michael Gorven (mgorven) wrote :

 review approve

review: Approve
Revision history for this message
Jonathan Hitchcock (vhata) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ibid/plugins/lookup.py'
2--- ibid/plugins/lookup.py 2009-05-12 07:13:41 +0000
3+++ ibid/plugins/lookup.py 2009-05-30 21:39:59 +0000
4@@ -5,7 +5,7 @@
5 from datetime import datetime
6 from random import choice
7 from simplejson import loads
8-from xml.dom.minidom import parse
9+from xml.etree.cElementTree import parse
10 import re
11
12 import feedparser
13@@ -134,25 +134,25 @@
14 id.isalnum() and id + '/nocomment' or quote(id),
15 urlencode({'language': self.fml_lang, 'key': self.api_key}))
16 )
17- dom = parse(urlopen(url))
18+ tree = parse(urlopen(url))
19
20- if dom.getElementsByTagName('error'):
21+ if tree.find('.//error'):
22 return
23
24- items = dom.getElementsByTagName('item')
25- if items:
26+ item = tree.find('.//item')
27+ if item:
28 url = u"http://www.fmylife.com/%s/%s" % (
29- items[0].getElementsByTagName('category')[0].childNodes[0].nodeValue,
30- items[0].getAttribute('id'),
31+ item.find('category').text,
32+ item.get('id'),
33 )
34- text = items[0].getElementsByTagName('text')[0].childNodes[0].nodeValue
35+ text = item.find('text').text
36
37 return u'%s : %s' % (url, text)
38
39 def setup(self):
40 url = urljoin(self.api_url, 'view/categories?' + urlencode({'language': self.fml_lang, 'key': self.api_key}))
41- dom = parse(urlopen(url))
42- self.categories = [cat.getAttribute('code') for cat in dom.getElementsByTagName('categorie')]
43+ tree = parse(urlopen(url))
44+ self.categories = [x.get('code') for x in tree.findall('.//categorie')]
45
46 self.fml.im_func.pattern = re.compile(r'^(?:fml\s+|http://www\.fmylife\.com/\S+/)(\d+|random|flop|top|last|%s)$' % (
47 '|'.join(self.categories),
48@@ -469,7 +469,7 @@
49
50 def remote_weather(self, place):
51 soup = self._get_page(place)
52- tds = soup.table.table.findAll('td')
53+ tds = [x.table for x in soup.findAll('table') if x.table][0].findAll('td')
54
55 # HACK: Some cities include a windchill row, but others don't
56 if len(tds) == 39:
57@@ -485,10 +485,11 @@
58 def remote_forecast(self, place):
59 soup = self._get_page(place)
60 forecasts = []
61+ table = [table for table in soup.findAll('table') if table.findAll('td', align='left')][0]
62
63- for td in soup.findAll('table')[0].findAll('td', align='left'):
64+ for td in table.findAll('td', align='left'):
65 day = td.b.string
66- forecast = td.contents[2]
67+ forecast = u' '.join([self._text(line) for line in td.contents[2:]])
68 forecasts.append(u'%s: %s' % (day, self._text(forecast)))
69
70 return forecasts

Subscribers

People subscribed via source and target branches