Merge lp:~markjtully/gwibber/twitter-entities into lp:gwibber

Proposed by Mark Tully
Status: Merged
Merged at revision: 1299
Proposed branch: lp:~markjtully/gwibber/twitter-entities
Merge into: lp:gwibber
Diff against target: 441 lines (+228/-56)
4 files modified
gwibber/microblog/plugins/twitter/__init__.py (+132/-24)
gwibber/microblog/util/__init__.py (+69/-17)
libgwibber-gtk/stream-view-tile.vala (+5/-2)
libgwibber/streams.vala (+22/-13)
To merge this branch: bzr merge lp:~markjtully/gwibber/twitter-entities
Reviewer Review Type Date Requested Status
Ken VanDine Approve
Review via email: mp+95821@code.launchpad.net

Description of the change

Adds support for using twitter tweet entities (see https://dev.twitter.com/docs/tweet-entities) This allows the following:
  Unwrapping of t.co urls so you can see where they lead while maintaining the link to the original t.co link for security (see https://support.twitter.com/entries/109623)
  Displaying urls in tweets in the same way as twitter.com (truncated if necessary, but with the full link visible on mouseover)
  Displaying tweets with links in the links stream
  Displaying tweets with images in the images stream once again (with image previews)
  Displaying tweets with videos in the videos stream (with thumbnails)

To post a comment you must log in.
Revision history for this message
Ken VanDine (ken-vandine) wrote :

Looks great!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'gwibber/microblog/plugins/twitter/__init__.py'
2--- gwibber/microblog/plugins/twitter/__init__.py 2012-02-13 20:39:02 +0000
3+++ gwibber/microblog/plugins/twitter/__init__.py 2012-03-05 00:08:19 +0000
4@@ -1,4 +1,5 @@
5 from gwibber.microblog import network, util
6+import cgi
7 from oauth import oauth
8 from gwibber.microblog.util import resources
9 from gettext import lgettext as _
10@@ -74,29 +75,129 @@
11 def _common(self, data):
12 m = {}
13 try:
14-
15 m["mid"] = str(data["id"])
16 m["service"] = "twitter"
17 m["account"] = self.account["id"]
18 if data.has_key("created_at"):
19 m["time"] = util.parsetime(data["created_at"])
20 m["text"] = util.unescape(data["text"])
21- m["to_me"] = ("@%s" % self.account["username"]) in data["text"]
22-
23- m["html"] = util.linkify(m["text"],
24- ((util.PARSE_HASH, '#<a class="hash" href="%s#search?q=\\1">\\1</a>' % URL_PREFIX),
25- (util.PARSE_NICK, '@<a class="nick" href="%s/\\1">\\1</a>' % URL_PREFIX)), escape=False)
26-
27- m["content"] = util.linkify(m["text"],
28- ((util.PARSE_HASH, '#<a href="gwibber:/tag?acct=%s&query=\\1">\\1</a>' % m["account"]),
29- (util.PARSE_NICK, '@<a href="gwibber:/user?acct=%s&name=\\1">\\1</a>' % m["account"])), escape=True)
30-
31- m["favorited"] = data.get("favorited", False)
32-
33- images = util.imgpreview(m["text"])
34- if images:
35- m["images"] = images
36- m["type"] = "photo"
37+ m["text"] = cgi.escape(m["text"])
38+ m["content"] = m["text"]
39+
40+ # Go through the entities in the tweet and use them to linkify/filter tweeks as appropriate
41+ if data.has_key("entities"):
42+
43+ #Get mention entries
44+ if data["entities"].has_key("user_mentions"):
45+ for mention in data["entities"]["user_mentions"]:
46+ try:
47+ screen_name = mention["screen_name"].lower()
48+ startindex = m["content"].lower().index("@" + screen_name) + 1
49+ endindex = startindex + len(screen_name)
50+ start = m["content"][0:startindex]
51+ end = m["content"][endindex:]
52+ m["content"] = start + "<a href='gwibber:/user?acct=" + m["account"] + "&name=@" + mention["screen_name"] + "'>" + mention["screen_name"] + "</a>" + end
53+ except:
54+ pass
55+
56+ #Get hashtag entities
57+ if data["entities"].has_key("hashtags"):
58+ for tags in data["entities"]["hashtags"]:
59+ try:
60+ text = tags["text"]
61+ startindex = m["content"].index("#" + text) + 1
62+ endindex = startindex + len(text)
63+ start = m["content"][0:startindex]
64+ end = m["content"][endindex:]
65+ m["content"] = start + "<a href='gwibber:/tag?acct=" + m["account"] + "&query=" + text + "'>" + text + "</a>" + end
66+ except:
67+ pass
68+
69+ # Get url entities - These usually go in the link stream, but if they're picturesor videos, they should go in the proper stream
70+ if data["entities"].has_key("urls"):
71+ for urls in data["entities"]["urls"]:
72+ url = cgi.escape (urls["url"])
73+ expanded_url = url
74+ if urls.has_key("expanded_url"):
75+ if not urls["expanded_url"] is None:
76+ expanded_url = cgi.escape(urls["expanded_url"])
77+
78+ display_url = url
79+ if urls.has_key("display_url"):
80+ display_url = cgi.escape (urls["display_url"])
81+
82+ if url == m["content"]:
83+ m["content"] = "<a href='" + url + "' title=" + expanded_url + "'>" + display_url + "</a>"
84+ else:
85+ try:
86+ startindex = m["content"].index(url)
87+ endindex = startindex + len(url)
88+ start = m["content"][0:startindex]
89+ end = m["content"][endindex:]
90+ m["content"] = start + "<a href='" + url + "' title=" + expanded_url + "'>" + display_url + "</a>" + end
91+ except:
92+ logger.debug ("Failed to set url for ID: %s", m["mid"])
93+
94+ m["type"] = "link"
95+
96+ images = util.imgpreview(expanded_url)
97+ videos = util.videopreview(expanded_url)
98+ if images:
99+ m["images"] = images
100+ m["type"] = "photo"
101+ elif videos:
102+ m["images"] = videos
103+ m["type"] = "video"
104+ else:
105+ # Well, it's not anything else, so it must be a link
106+ m["link"] = {}
107+ m["link"]["picture"] = ""
108+ m["link"]["name"] = ""
109+ m["link"]["description"] = m["content"]
110+ m["link"]["url"] = url
111+ m["link"]["icon"] = ""
112+ m["link"]["caption"] = ""
113+ m["link"]["properties"] = {}
114+
115+ if data["entities"].has_key("media"):
116+ for media in data["entities"]["media"]:
117+ try:
118+ url = cgi.escape (media["url"])
119+ media_url_https = media["media_url_https"]
120+ expanded_url = url
121+ if media.has_key("expanded_url"):
122+ expanded_url = cgi.escape(media["expanded_url"])
123+
124+ display_url = url
125+ if media.has_key("display_url"):
126+ display_url = cgi.escape (media["display_url"])
127+
128+ startindex = m["content"].index(url)
129+ endindex = startindex + len(url)
130+ start = m["content"][0:startindex]
131+ end = m["content"][endindex:]
132+ m["content"] = start + "<a href='" + url + "' title=" + expanded_url + "'>" + display_url + "</a>" + end
133+
134+ if media["type"] == "photo":
135+ m["type"] = "photo"
136+ m["photo"] = {}
137+ m["photo"]["picture"] = media_url_https
138+ m["photo"]["url"] = None
139+ m["photo"]["name"] = None
140+
141+ except:
142+ pass
143+
144+ else:
145+ m["content"] = util.linkify(util.unescape(m["text"]),
146+ ((util.PARSE_HASH, '#<a href="gwibber:/tag?acct=%s&query=\\1">\\1</a>' % m["account"]),
147+ (util.PARSE_NICK, '@<a href="gwibber:/user?acct=%s&name=\\1">\\1</a>' % m["account"])), escape=True)
148+
149+ m["html"] = m["content"]
150+
151+ m["to_me"] = ("@%s" % self.account["username"]) in data["text"] # Check if it's a reply directed at the user
152+ m["favorited"] = data.get("favorited", False) # Check if the tweet has been favourited
153+
154 except:
155 logger.error("%s failure - %s", PROTOCOL_INFO["name"], data)
156 return {}
157@@ -158,9 +259,16 @@
158
159 return m
160
161+ def _responses(self, data):
162+ m = self._message(data)
163+ m["type"] = None
164+
165+ return m
166+
167 def _private(self, data):
168 m = self._message(data)
169 m["private"] = True
170+ m["type"] = None
171
172 m["recipient"] = {}
173 m["recipient"]["name"] = data["recipient"]["name"]
174@@ -307,18 +415,18 @@
175 return getattr(self, opname)(**args)
176
177 def receive(self, count=util.COUNT, since=None):
178- return self._get("statuses/home_timeline.json", count=count, since_id=since)
179+ return self._get("statuses/home_timeline.json", include_entities=1, count=count, since_id=since)
180
181 def responses(self, count=util.COUNT, since=None):
182- return self._get("statuses/mentions.json", count=count, since_id=since)
183+ return self._get("statuses/mentions.json", "responses", include_entities=1, count=count, since_id=since)
184
185 def private(self, count=util.COUNT, since=None):
186- private = self._get("direct_messages.json", "private", count=count, since_id=since) or []
187+ private = self._get("direct_messages.json", "private", include_entities=1, count=count, since_id=since) or []
188 private_sent = self._get("direct_messages/sent.json", "private", count=count, since_id=since) or []
189 return private + private_sent
190
191 def public(self):
192- return self._get("statuses/public_timeline.json")
193+ return self._get("statuses/public_timeline.json", include_entities=1)
194
195 def lists(self, **args):
196 following = self._get("%s/lists/subscriptions.json" % self.account["username"], "list") or []
197@@ -326,10 +434,10 @@
198 return following + lists
199
200 def list(self, user, id, count=util.COUNT, since=None):
201- return self._get("%s/lists/%s/statuses.json" % (user, id), per_page=count, since_id=since)
202+ return self._get("%s/lists/%s/statuses.json" % (user, id), include_entities=1, per_page=count, since_id=since)
203
204 def search(self, query, count=util.COUNT, since=None):
205- return self._search(q=query, rpp=count, since_id=since)
206+ return self._search(include_entities=1, q=query, rpp=count, since_id=since)
207
208 def tag(self, query, count=util.COUNT, since=None):
209 return self._search(q="#%s" % query, count=count, since_id=since)
210@@ -366,5 +474,5 @@
211
212 def user_messages(self, id=None, count=util.COUNT, since=None):
213 profiles = [self.profile(id)] or []
214- messages = self._get("statuses/user_timeline.json", id=id, count=count, since_id=since) or []
215+ messages = self._get("statuses/user_timeline.json", id=id, include_entities=1, count=count, since_id=since) or []
216 return messages + profiles
217
218=== modified file 'gwibber/microblog/util/__init__.py'
219--- gwibber/microblog/util/__init__.py 2012-02-13 20:39:02 +0000
220+++ gwibber/microblog/util/__init__.py 2012-03-05 00:08:19 +0000
221@@ -1,4 +1,4 @@
222-import os, locale, re, mx.DateTime, cgi
223+import os, locale, re, mx.DateTime, cgi, httplib2
224 import resources
225 import dbus
226 from const import *
227@@ -65,6 +65,37 @@
228 return re.compile(r'"www.', re.U).sub('"http://www.', link)
229
230 def imgpreview(text):
231+ images = []
232+
233+ # If the text is a direct link to a jpg
234+ if text.endswith((".jpg", ".gif", ".png", ".bmp")):
235+ images.append({"src": text, "url": text})
236+ return images
237+
238+ # For pic.twitter.com images not wrapped in media entities
239+ if "pic.twitter.com" in text:
240+ # Annoyingly, we have to scrape the page of the tweet to get the actual image location
241+ # The mobile site has smaller pages, so we'll use that
242+ page = text.replace("/photo/1", "")
243+ page = thumb.replace("http://", "http://mobile.")
244+
245+ resp, content = httplib2.Http().request(page)
246+ start = content.index("http://p.twimg.com")
247+ end = content.index(':small"><img') + 6
248+
249+ image = content[start:end]
250+ images.append({"src": image, "url": text})
251+ return images
252+
253+ if "instagr.am" in text:
254+ # The location of the image is hidden in the header of the short link
255+ thumb = text + "media?/size=m"
256+ resp, content = httplib2.Http().request(thumb)
257+ thumb = resp["content-location"]
258+
259+ images.append({"src": thumb, "url": text})
260+ return images
261+
262 thumbre = {
263 'twitpic': 'http://.*twitpic.com/(?!photos)([A-Za-z0-9]+)',
264 'img.gd': 'http://img.gd/(?!photos)([A-Za-z0-9]+)',
265@@ -72,12 +103,10 @@
266 'twitgoo': 'http://.*twitgoo.com/(?!u/)([A-Za-z0-9]+)',
267 'yfrog.us': 'http://.*yfrog.us/(?!froggy)([A-Za-z0-9]+)',
268 'yfrog.com': 'http://.*yfrog.com/(?!froggy)([A-Za-z0-9]+)',
269- 'twitvid': 'http://.*twitvid.com/(?!videos)([A-Za-z0-9]+)',
270 'img.ly': 'http://img.ly/(?!images)([A-Za-z0-9]+)',
271 'flic.kr': 'http://flic.kr/p/([A-Za-z0-9]+)',
272- 'youtu.be': 'http://youtu.be/([A-Za-z0-9-_]+)',
273- 'youtube.com': 'http://.*youtube.com/watch\?v=([A-Za-z0-9-_]+)',
274- 'tweetphoto': 'http://.*tweetphoto.com/(0-9]+)',
275+ 'tweetphoto': 'http://.*tweetphoto.com/([0-9]+)',
276+ 'plixi': 'http://plixi.com/p/([0-9]+)',
277 'pic.gd': 'http://pic.gd/([A-Za-z0-9]+)',
278 'brizzly': 'http://.*brizzly.com/pic/([A-Za-z0-9]+)',
279 'twitxr': 'http://.*twitxr.com\/[^ ]+\/updates\/([0-9]+)',
280@@ -89,22 +118,19 @@
281 'moby.to': 'http://moby.to/([A-Za-z0-9]+)',
282 'movapic': 'http://.*movapic.com/pic/([A-Za-z0-9]+)',
283 'znl.me': 'http://znl.me/([A-Za-z0-9-_]+)',
284- 'bcphotoshare': 'http://.*bcphotoshare.com/photos/[0-9]+/([0-9]+)',
285- 'twitvideo.jp': 'http://.*twitvideo.jp/(?!contents)([A-Za-z0-9-_]+)'
286+ 'bcphotoshare': 'http://.*bcphotoshare.com/photos/[0-9]+/([0-9]+)'
287 }
288 thumburi = {
289 'twitpic': 'http://twitpic.com/show/thumb/@',
290 'img.gd': 'http://img.gd/show/thumb/@',
291 'imgur': 'http://i.imgur.com/@s.jpg',
292 'twitgoo': 'http://twitgoo.com/show/thumb/@',
293- 'yfrog.us': 'http://yfrog.us/@.th.jpg',
294- 'yfrog.com': 'http://yfrog.com/@.th.jpg',
295- 'twitvid': 'http://images.twitvid.com/@.jpg',
296+ 'yfrog.us': 'http://yfrog.us/@:iphone',
297+ 'yfrog.com': 'http://yfrog.com/@:iphone',
298 'img.ly': 'http://img.ly/show/thumb/@',
299 'flic.kr': 'http://flic.kr/p/img/@_m.jpg',
300- 'youtu.be': 'http://img.youtube.com/vi/@/default.jpg',
301- 'youtube.com': 'http://img.youtube.com/vi/@/default.jpg',
302 'tweetphoto': 'http://TweetPhotoAPI.com/api/TPAPI.svc/json/imagefromurl?size=thumbnail&url=@',
303+ 'plixi': 'http://api.plixi.com/api/tpapi.svc/imagefromurl?size=thumbnail&url=@',
304 'pic.gd': 'http://TweetPhotoAPI.com/api/TPAPI.svc/json/imagefromurl?size=thumbnail&url=@',
305 'brizzly': 'http://pics.brizzly.com/thumb_sm_@.jpg',
306 'twitxr': 'http://twitxr.com/image/@/th/',
307@@ -116,19 +142,45 @@
308 'moby.to': 'http://api.mobypicture.com?s=small&format=plain&k=6JQhCKX6Z9h2m9Lo&t=@',
309 'movapic': 'http://image.movapic.com/pic/s_@.jpeg',
310 'znl.me': 'http://app.zannel.com/content/@/Image-160x120-P-JPG.jpg',
311- 'bcphotoshare': 'http://images.bcphotoshare.com/storages/@/thumbnail.jpg',
312- 'twitvideo.jp': 'http://twitvideo.jp/img/thumb/@'
313+ 'bcphotoshare': 'http://images.bcphotoshare.com/storages/@/thumbnail.jpg'
314 }
315
316- images = []
317+
318 for r, u in zip(thumbre, thumburi):
319 for match in re.finditer(thumbre[r], text):
320 if r == 'tweetphoto' or r == 'pic.gd' or r == 'moby.to':
321- images.append({"src": thumburi[u].replace('@', match.group(0)) , "url": match.group(0)})
322+ images.append({"src": thumburi[u].replace('@', match.group(0)) , "url": text})
323 else:
324- images.append({"src": thumburi[u].replace('@', match.group(1)) , "url": match.group(0)})
325+ images.append({"src": thumburi[u].replace('@', match.group(1)) , "url": text})
326 return images
327
328+def videopreview(text):
329+ videos = []
330+
331+ thumbre = {
332+ 'twitvid': 'http://.*twitvid.com/(?!videos)([A-Za-z0-9]+)',
333+ 'youtu.be': 'http://youtu.be/([A-Za-z0-9-_]+)',
334+ 'youtube.com': 'http://.*youtube.com/watch\?v=([A-Za-z0-9-_]+)',
335+ 'twitvideo.jp': 'http://.*twitvideo.jp/(?!contents)([A-Za-z0-9-_]+)'
336+ }
337+ thumburi = {
338+ 'twitvid': 'http://images.twitvid.com/@.jpg',
339+ 'youtu.be': 'http://img.youtube.com/vi/@/0.jpg',
340+ 'youtube.com': 'http://img.youtube.com/vi/@/0.jpg',
341+ 'twitvideo.jp': 'http://twitvideo.jp/img/thumb/@'
342+ }
343+ thumbvid = {
344+ 'twitvid': 'http://.*twitvid.com/@',
345+ 'youtu.be': 'http://www.youtube.com/watch?v=@',
346+ 'youtube.com': 'http://www.youtube.com/watch?v=@',
347+ 'twitvideo.jp': 'http://www.twitvideo.jp/@'
348+ }
349+
350+ for r, u in zip(thumbre, thumburi):
351+ for match in re.finditer(thumbre[r], text):
352+ videos.append({ "src": thumburi[u].replace('@', match.group(1)), "url" : text})
353+ return videos
354+
355 def compact(data):
356 if isinstance(data, dict):
357 return dict([(x, y) for x,y in data.items() if y])
358
359=== modified file 'libgwibber-gtk/stream-view-tile.vala'
360--- libgwibber-gtk/stream-view-tile.vala 2012-02-23 08:59:27 +0000
361+++ libgwibber-gtk/stream-view-tile.vala 2012-03-05 00:08:19 +0000
362@@ -450,7 +450,10 @@
363 }
364 else if (_stream == "videos")
365 {
366- img_uri = _video_picture;
367+ if (_video_picture.length < 1 && _img_src.length > 0)
368+ img_uri = _img_src;
369+ else
370+ img_uri = _video_picture;
371 img_src = _video_src;
372 }
373
374@@ -949,7 +952,7 @@
375 var last = uri.substring(uri.last_index_of("/") + 1);
376 ret = "http://i.imgur.com/%s.png".printf(last);
377 }
378- else if (uri.contains("youtube.com"))
379+ else if (uri.contains("youtube.com") && !uri.contains("img.youtube.com"))
380 {
381 string id = uri.substring(uri.last_index_of("/") + 1);
382
383
384=== modified file 'libgwibber/streams.vala'
385--- libgwibber/streams.vala 2012-02-23 04:46:15 +0000
386+++ libgwibber/streams.vala 2012-03-05 00:08:19 +0000
387@@ -675,25 +675,16 @@
388 /* escape markup in some strings, pango doesn't like it */
389 if (_link_name != null)
390 _link_name = GLib.Markup.escape_text (_link_name);
391- if (_link_description != null)
392- _link_description = GLib.Markup.escape_text (_link_description);
393 if (_image_name != null)
394 _image_name = GLib.Markup.escape_text (_image_name);
395 if (_video_name != null)
396 _video_name = GLib.Markup.escape_text (_video_name);
397
398- /* FIXME: hacky scrubbing of the html, we should find a
399- better way */
400 if (_html != null)
401- _html = _html.replace("&query", "&amp;query");
402- _html = _html.replace("&name", "&amp;name");
403- _html = _html.replace("class=\"nick\"", "");
404- _html = _html.replace("class=\"hash\"", "");
405- _html = _html.replace("<p>", "");
406- _html = _html.replace("</p>", "");
407- _html = _html.replace("<b>", "");
408- _html = _html.replace("</b>", "");
409- //debug ("_html: %s", _html);
410+ _html = scrub (_html);
411+
412+ if (_link_description != null)
413+ _link_description = scrub (_link_description);
414
415 string _t = utils.generate_time_string(_time);
416
417@@ -769,6 +760,24 @@
418 //debug ("_model has %u ROWS", _model.get_n_rows ());
419 }
420
421+ private string scrub (string content)
422+ {
423+ /* FIXME: hacky scrubbing of the html, we should find a
424+ better way */
425+ string res = content;
426+ res = res.replace("&query", "&amp;query");
427+ res = res.replace("&name", "&amp;name");
428+ res = res.replace("class=\"nick\"", "");
429+ res = res.replace("class=\"hash\"", "");
430+ res = res.replace("<p>", "");
431+ res = res.replace("</p>", "");
432+ res = res.replace("<b>", "");
433+ res = res.replace("</b>", "");
434+ //debug ("res: %s", res);
435+ return res;
436+ }
437+
438+
439
440 /**
441 * com.Gwibber.Streams

Subscribers

People subscribed via source and target branches