#!/usr/bin/env python
# encoding=utf-8
"""Twitter news fetcher.
This script searches Twitter for a few words, merges results together and
renders a static HTML page which is later embedded in the web site to get a
news widget which doesn't need scripting."""
import json
import re
import rfc822
import sys
import time
import urllib2
ITEM_TEMPLATE = u"
s)
%(text)s
%(date)s
"
PAGE_TEMPLATE = u"""
Recent Twits
"""
def format_text(text):
# Links.
links = re.findall("((http|https|ftp)://[a-z0-9.-_]+)", text)
if links is not None:
for link, proto in links:
text = text.replace(link, u"%s" % (link, link))
# Links to profiles.
text = re.sub("(@(\w+))", "\\1", text)
# Hash tags.
tags = re.findall("(#(\w+))", text, re.U)
if tags is not None:
for tag, tag_name in tags:
link = urllib2.quote(tag.encode("utf-8"))
text = text.replace(tag, u"%s" % (link, tag))
return text
def search_twitter(term):
if isinstance(term, unicode):
term = term.encode("utf-8")
url = "http://search.twitter.com/search.json?q=%s&rpp=5&include_entities=true&with_twitter_user_id=true&result_type=mixed" % urllib2.quote(term)
try:
data = json.loads(urllib2.urlopen(url).read().decode("utf-8"))
except urllib2.HTTPError, e:
print >> sys.stderr, "Error searching for %s: %s" % (term.encode("utf-8"), e)
return {}
result = {}
for entry in data["results"]:
ts = rfc822.parsedate(entry["created_at"])
result[entry["id"]] = {
"id": entry["id"],
"author": entry["from_user"],
"date": time.strftime("%d.%m.%Y %H:%M", ts),
"text": format_text(entry["text"]),
"picture": entry["profile_image_url"],
}
return result
def format_twits(twits):
body = u""
for _id, twit in sorted(twits.items(), key=lambda t: t[0], reverse=True):
body += ITEM_TEMPLATE % twit
return PAGE_TEMPLATE % body
if __name__ == "__main__":
if len(sys.argv) <= 1:
print >>sys.stderr, "Usage: python %s \"#search1\" @search2 ... > page.html" % sys.argv[0]
exit(1)
twits = {}
for term in sys.argv[1:]:
twits.update(search_twitter(term))
html = format_twits(twits)
print html.encode("utf-8")