#!/usr/bin/env python # encoding=utf-8 """Twitter news fetcher. This script searches Twitter for a few words, merges results together and renders a static HTML page which is later embedded in the web site to get a news widget which doesn't need scripting.""" import json import re import rfc822 import sys import time import urllib2 ITEM_TEMPLATE = u"
  • avatar

    %(text)s

    %(date)s

  • " PAGE_TEMPLATE = u""" Recent Twits """ def format_text(text): # Links. links = re.findall("((http|https|ftp)://[a-z0-9.-_]+)", text) if links is not None: for link, proto in links: text = text.replace(link, u"%s" % (link, link)) # Links to profiles. text = re.sub("(@(\w+))", "\\1", text) # Hash tags. tags = re.findall("(#(\w+))", text, re.U) if tags is not None: for tag, tag_name in tags: link = urllib2.quote(tag.encode("utf-8")) text = text.replace(tag, u"%s" % (link, tag)) return text def search_twitter(term): if isinstance(term, unicode): term = term.encode("utf-8") url = "http://search.twitter.com/search.json?q=%s&rpp=5&include_entities=true&with_twitter_user_id=true&result_type=mixed" % urllib2.quote(term) try: data = json.loads(urllib2.urlopen(url).read().decode("utf-8")) except urllib2.HTTPError, e: print >> sys.stderr, "Error searching for %s: %s" % (term.encode("utf-8"), e) return {} result = {} for entry in data["results"]: ts = rfc822.parsedate(entry["created_at"]) result[entry["id"]] = { "id": entry["id"], "author": entry["from_user"], "date": time.strftime("%d.%m.%Y %H:%M", ts), "text": format_text(entry["text"]), "picture": entry["profile_image_url"], } return result def format_twits(twits): body = u"" for _id, twit in sorted(twits.items(), key=lambda t: t[0], reverse=True): body += ITEM_TEMPLATE % twit return PAGE_TEMPLATE % body if __name__ == "__main__": if len(sys.argv) <= 1: print >>sys.stderr, "Usage: python %s \"#search1\" @search2 ... > page.html" % sys.argv[0] exit(1) twits = {} for term in sys.argv[1:]: twits.update(search_twitter(term)) html = format_twits(twits) print html.encode("utf-8")