Merge lp:~stefanor/ibid/feedcache-330880 into lp:~ibid-core/ibid/old-trunk-pack-0.92

Proposed by Stefano Rivera
Status: Merged
Approved by: Michael Gorven
Approved revision: 566
Merged at revision: 567
Proposed branch: lp:~stefanor/ibid/feedcache-330880
Merge into: lp:~ibid-core/ibid/old-trunk-pack-0.92
Diff against target: None lines
To merge this branch: bzr merge lp:~stefanor/ibid/feedcache-330880
Reviewer Review Type Date Requested Status
Michael Gorven Approve
Jonathan Hitchcock Approve
Review via email: mp+4271@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Stefano Rivera (stefanor) wrote :

My squid started biting me. We may come across similar content-encoding issues elsewhere, too.

Revision history for this message
Jonathan Hitchcock (vhata) :
review: Approve
Revision history for this message
Michael Gorven (mgorven) wrote :

Looks fine. Needs to wait for lp:~stefanor/ibid/exchange-336443 though.

lp:~stefanor/ibid/feedcache-330880 updated
564. By Stefano Rivera

Found a bug in compression header handling

565. By Stefano Rivera

Merge from trunk

566. By Stefano Rivera

Typo

Revision history for this message
Michael Gorven (mgorven) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'ibid/plugins/feeds.py'
--- ibid/plugins/feeds.py 2009-03-01 23:01:30 +0000
+++ ibid/plugins/feeds.py 2009-03-07 18:41:10 +0000
@@ -1,6 +1,9 @@
1import re1import re
2from datetime import datetime2from datetime import datetime
3import logging3import logging
4from urllib2 import urlopen, URLError
5from urlparse import urljoin
6from html5lib import HTMLParser, treebuilders
47
5from sqlalchemy import Column, Integer, Unicode, DateTime, UnicodeText, ForeignKey, Table8from sqlalchemy import Column, Integer, Unicode, DateTime, UnicodeText, ForeignKey, Table
6from sqlalchemy.sql import func9from sqlalchemy.sql import func
@@ -10,6 +13,7 @@
10import ibid13import ibid
11from ibid.plugins import Processor, match, authorise14from ibid.plugins import Processor, match, authorise
12from ibid.models import Base15from ibid.models import Base
16from ibid.utils import cacheable_download, get_soup
1317
14help = {'feeds': u'Displays articles from RSS and Atom feeds'}18help = {'feeds': u'Displays articles from RSS and Atom feeds'}
1519
@@ -32,15 +36,11 @@
32 self.url = url36 self.url = url
33 self.identity_id = identity_id37 self.identity_id = identity_id
34 self.time = datetime.now()38 self.time = datetime.now()
35
36 def is_valid(self):
37 self.update()39 self.update()
38 if self.feed['version']:
39 return True
40 return False
4140
42 def update(self):41 def update(self):
43 self.feed = feedparser.parse(self.url)42 feedfile = cacheable_download(self.url, "feeds/%s-%i.xml" % (re.sub(r'\W+', '_', self.name), self.identity_id))
43 self.feed = feedparser.parse(feedfile)
44 self.entries = self.feed['entries']44 self.entries = self.feed['entries']
4545
46class Manage(Processor):46class Manage(Processor):
@@ -59,18 +59,31 @@
5959
60 if feed:60 if feed:
61 event.addresponse(u"I already have the %s feed" % name)61 event.addresponse(u"I already have the %s feed" % name)
62 else:62 return
63 feed = Feed(unicode(name), unicode(url), event.identity)63
6464 valid = bool(feedparser.parse(url)["version"])
65 if feed.is_valid():65
66 session.save(feed)66 if not valid:
67 session.flush()67 soup = get_soup(url)
68 event.addresponse(True)68 for alternate in soup.findAll('link', {'rel': 'alternate',
69 log.info(u"Added feed '%s' by %s/%s (%s): %s (Found %s entries)", name, event.account, event.identity, event.sender['connection'], url, len(feed.entries))69 'type': re.compile(r'^application/(atom|rss)\+xml$'),
70 else:70 'href': re.compile(r'.+')}):
71 newurl = urljoin(url, alternate["href"])
72 valid = bool(feedparser.parse(newurl)["version"])
73
74 if valid:
75 url = newurl
76 break
77
78 if not valid:
71 event.addresponse(u"Sorry, I could not add the %s feed. %s is not a valid feed" % (name,url))79 event.addresponse(u"Sorry, I could not add the %s feed. %s is not a valid feed" % (name,url))
80 return
7281
73 session.close()82 feed = Feed(unicode(name), unicode(url), event.identity)
83 session.save(feed)
84 session.flush()
85 event.addresponse(True)
86 log.info(u"Added feed '%s' by %s/%s (%s): %s (Found %s entries)", name, event.account, event.identity, event.sender['connection'], url, len(feed.entries))
7487
75 @match(r'^(?:list\s+)?feeds$')88 @match(r'^(?:list\s+)?feeds$')
76 def list(self, event):89 def list(self, event):
7790
=== modified file 'ibid/utils.py'
--- ibid/utils.py 2009-03-05 16:33:12 +0000
+++ ibid/utils.py 2009-03-07 18:41:10 +0000
@@ -1,10 +1,16 @@
1import cgi
2from gzip import GzipFile
1from htmlentitydefs import name2codepoint3from htmlentitydefs import name2codepoint
2import os4import os
3import os.path5import os.path
4from pkg_resources import resource_exists, resource_string6from pkg_resources import resource_exists, resource_string
5import re7import re
8from StringIO import StringIO
6import time9import time
7import urllib210import urllib2
11import zlib
12
13from html5lib import HTMLParser, treebuilders
814
9import ibid15import ibid
1016
@@ -80,10 +86,21 @@
80 # Download into a temporary file, in case something goes wrong86 # Download into a temporary file, in case something goes wrong
81 downloadfile = os.path.join(plugindir, ".download." + os.path.basename(cachefile))87 downloadfile = os.path.join(plugindir, ".download." + os.path.basename(cachefile))
82 outfile = file(downloadfile, "wb")88 outfile = file(downloadfile, "wb")
83 buf = "x"89 data = connection.read()
84 while len(buf) > 0:90
85 buf = connection.read(1024)91 compression = connection.headers.get('content-encoding')
86 outfile.write(buf)92 if compression:
93 if compression.lower() == "deflate":
94 try:
95 data = zlib.decompress(data)
96 except zlib.error:
97 data = zlib.decompress(data, -zlib.MAX_WBITS)
98 elif compression.lower() == "gzip":
99 compressedstream = StringIO(data)
100 gzipper = GzipFile(fileobj=compressedstream)
101 data = gzipper.read()
102
103 outfile.write(data)
87 104
88 outfile.close()105 outfile.close()
89106
@@ -112,4 +129,34 @@
112def ibid_version():129def ibid_version():
113 return resource_exists(__name__, '.version') and resource_string(__name__, '.version').strip() or None130 return resource_exists(__name__, '.version') and resource_string(__name__, '.version').strip() or None
114131
132def get_soup(url, data=None, headers={}):
133 "Request a URL and create a BeautifulSoup parse tree from it"
134
135 req = urllib2.Request(url, data, headers)
136 f = urllib2.urlopen(req)
137 data = f.read()
138 f.close()
139
140 encoding = None
141 contentType = f.headers.get('content-type')
142 if contentType:
143 (mediaType, params) = cgi.parse_header(contentType)
144 encoding = params.get('charset')
145
146 compression = f.headers.get('content-encoding')
147 if compression.lower() == "deflate":
148 try:
149 data = zlib.decompress(data)
150 except zlib.error:
151 data = zlib.decompress(data, -zlib.MAX_WBITS)
152 elif compression.lower() == "gzip":
153 compressedstream = StringIO(data)
154 gzipper = GzipFile(fileobj=compressedstream)
155 data = gzipper.read()
156
157 treebuilder = treebuilders.getTreeBuilder("beautifulsoup")
158 parser = HTMLParser(tree=treebuilder)
159
160 return parser.parse(data, encoding=encoding)
161
115# vi: set et sta sw=4 ts=4:162# vi: set et sta sw=4 ts=4:

Subscribers

People subscribed via source and target branches