#!/usr/bin/env python # -*- coding: utf-8 -*- # Mitter, a client for Twitter. # Copyright (C) 2007, 2008 The Mitter Contributors # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program. If not, see . import urllib import urllib2 import logging import datetime import base64 import htmlentitydefs import re import warnings import htmlentitydefs from httplib import BadStatusLine from socket import error as socketError #from mitterlib import htmlize from networkbase import NetworkBase, NetworkData, auth_options, \ NetworkDNSError, NetworkBadStatusLineError, NetworkLowLevelError, \ NetworkInvalidResponseError, NetworkPermissionDeniedError, \ MessageTooLongWarning try: # Python 2.6/3.0 JSON parser import json except ImportError: # Fallback to SimpleJSON import simplejson as json # logging _log = logging.getLogger('mitterlib.network.Twitter') # the month names come directly from the site, so we are not affected by # locale settings. _month_names = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] def htmlize(text): """Convert a normal text to the format required by Twitter (url-encoded and UTF-8 encoded.""" # XXX: UTF-8 part is not working as it should, sadly. if not isinstance(text, unicode): text = unicode(text, 'utf-8') # hex_char = (lambda x: '%' + hex(x)[2:].rjust(2, '0').upper()) # # codes = [] # for char in text: # char_code = ord(char) # bytes = [] # mask = 0 # # ranges = [65535, 2047, 127] # for r in ranges: # if char_code > r: # mask >>= 1 # mask |= 128 # # byte = char_code & 63 # byte |= 128 # # bytes.insert(0, hex_char(byte)) # # char_code >>= 6 # # if mask: # # multibyte char # mask >>= 1 # mask |= 128 # # byte = char_code | mask # bytes.insert(0, hex_char(byte)) # else: # if char_code < 32: # # special char and it's not a multibyte char # bytes.insert(0, hex_char(char_code)) # else: # # normal, printable char # bytes.insert(0, char) # # codes.append(''.join(bytes)) # # text = ''.join(codes) new = [] for char in text: if ord(char) in htmlentitydefs.codepoint2name: new.append('&%s;' % (htmlentitydefs.codepoint2name[ord(char)])) elif ord(char) == 37: # '%' new.append('%25') else: new.append(char) return ''.join(new) def _unhtml(text): """Convert text coming in HTML encoded to UTF-8 representations.""" new_text = [] copy_pos = 0 _log.debug('Original text: %s', text) for code in re.finditer(r'&(\w+);', text): new_text.append(text[copy_pos:code.start()]) entity = text[code.start()+1:code.end()-1] if entity in htmlentitydefs.name2codepoint: new_text.append(unichr( htmlentitydefs.name2codepoint[entity])) else: new_text.append(code.group().decode('utf8')) copy_pos = code.end() new_text.append(text[copy_pos:]) _log.debug('New text: %s', new_text) result = u''.join(new_text) _log.debug('Result: %s', result) return result def _to_datetime(server_str): """Convert a date send by the server to a datetime object. Ex: from this: Tue Mar 13 00:12:41 +0000 2007 to datetime. """ date_info = server_str.split(' ') month = _month_names.index(date_info[1]) day = int(date_info[2]) year = int(date_info[5]) time_info = date_info[3].split(':') hour = int(time_info[0]) minute = int(time_info[1]) second = int(time_info[2]) return datetime.datetime(year, month, day, hour, minute, second) def _make_datetime(response): """Converts dates on responses to datetime objects.""" result = [] for tweet in response: result.append(TwitterNetworkData(tweet)) return result class TwitterNetworkData(NetworkData): """A simple wrapper around NetworkData, to make things easier to convert twitter data into a NetworkData object.""" def __init__(self, data): """Class initialization. Receives a dictionary with a single tweet.""" NetworkData.__init__(self) self.id = data['id'] self.name = data['user']['name'] self.username = data['user']['screen_name'] self.avatar = data['user']['profile_image_url'] self.message_time = _to_datetime(data['created_at']) if 'favorited' in data: self.favourited = data['favorited'] if 'in_reply_to_status_id' in data and data['in_reply_to_status_id']: self.parent = int(data['in_reply_to_status_id']) self.parent_owner = data['in_reply_to_screen_name'] if 'retweeted_status' in data: self.reposted_by = self.username retweet_user = data['retweeted_status']['user'] self.name = retweet_user['name'] self.username = retweet_user['screen_name'] self.avatar = retweet_user['profile_image_url'] self.id = data['retweeted_status']['id'] # also switch the text for the original text. data['text'] = data['retweeted_status']['text'] # keep the message_time as is, so we have the retweet time, not he # original message time # Twitter encodes a lot of HTML entities, which are not good when # you want to *display* then (e.g., "<" returns to us as "<"). # So we convert this here. Interfaces need to worry about converting # them if it becomes a problem. self.message = _unhtml(data['text']) return class Connection(NetworkBase): """Base class to talk to twitter.""" NAMESPACE = 'Twitter' SHORTCUT = 'tw' # TODO: find a way to move this to the config file def is_setup(self): """Return True or False if the network is setup/enabled.""" if (self._options[self.NAMESPACE]['username'] and self._options[self.NAMESPACE]['password']): # Consider the network enabled if there is an username and # password return True else: return False def __init__(self, options): self._options = options @property def server(self): if self._options[self.NAMESPACE]['https']: return self._options[self.NAMESPACE]['secure_server_url'] else: return self._options[self.NAMESPACE]['server_url'] def _common_headers(self): """Returns a string with the normal headers we should add on every request""" auth = base64.b64encode('%s:%s' % ( self._options[self.NAMESPACE]['username'], self._options[self.NAMESPACE]['password'])) headers = { 'Authorization': 'Basic %s' % (auth), 'User-Agent': self._user_agent} return headers def _request(self, resource, headers=None, body=None): """Send a request to the Twitter server. Once finished, call the function at callback.""" url = '%s%s' % (self.server, resource) _log.debug('Request %s' % (url)) request = urllib2.Request(url=url) request_headers = self._common_headers() if headers: request_headers.update(headers) for key in request_headers: _log.debug('Header: %s=%s' % (key, request_headers[key])) request.add_header(key, request_headers[key]) if body: _log.debug('Body: %s' % (body)) request.add_data(body) try: _log.debug('Starting request of %s' % (url)) response = urllib2.urlopen(request) data = response.read() except urllib2.HTTPError, exc: _log.debug('HTTPError: %d' % (exc.code)) _log.debug('HTTPError: response body:\n%s' % exc.read()) # To me, I got a lot of 502 for "replies". It shows the # "Something is technically wrong" most of the time in the real # pages. if exc.code == 403: # Permission denied. raise NetworkPermissionDeniedError raise NetworkInvalidResponseError except urllib2.URLError, exc: _log.error('URL error: %s' % exc.reason) raise NetworkDNSError except BadStatusLine: _log.error('Bad status line (Twitter is going bananas)') raise NetworkBadStatusLineError except socketError: # That's the worst exception ever. _log.error('Socket connection error') raise NetworkLowLevelError # TODO: Permission denied? # Introduced in Twitter in 2009.03.27 response_headers = response.info() if 'X-RateLimit-Remaining' in response_headers: self._rate_limit = int(response_headers['X-RateLimit-Remaining']) _log.debug('Remaning hits: %d', self._rate_limit) elif 'x-ratelimit-remaining' in response_headers: self._rate_limit = int(response_headers['x-ratelimit-remaining']) _log.debug('Remaning hits: %d', self._rate_limit) else: self._rate_limit = None _log.debug('Request completed') _log.debug('info(%s): %s', type(response.info()), response.info()) return json.loads(data) # # New network style methods # AUTH = [ {'name': 'username', 'flags': ['-u', '--username'], 'prompt': 'Username', 'help': 'Your twitter username', 'type': 'str'}, {'name': 'password', 'flags': ['-p', '--password'], 'prompt': 'Password', 'help': 'Your twitter password', 'type': 'passwd'}] @classmethod def options(self, options): """Add options related to Twitter.""" options.add_group(self.NAMESPACE, 'Twitter network') options.add_option('-s', '--no-https', group=self.NAMESPACE, option='https', default=True, # Secure connections by default help='Disable HTTPS (secure) connection with Twitter.', action='store_false') options.add_option( group=self.NAMESPACE, option='last_tweet', default=0, is_cmd_option=False) options.add_option( group=self.NAMESPACE, option='last_reply', default=0, is_cmd_option=False) options.add_option( group=self.NAMESPACE, option='server_url', default='http://api.twitter.com/1', is_cmd_option=False) options.add_option( group=self.NAMESPACE, option='secure_server_url', default='https://api.twitter.com/1', is_cmd_option=False) options.add_option( group=self.NAMESPACE, option='message_threshold', default=16, is_cmd_option=False) auth_options(self.NAMESPACE, options, self.AUTH) return def _timeline(self, config_var, url): """Request one of the lists of tweets.""" last_id = int(self._options[self.NAMESPACE][config_var]) _log.debug('%s: %d', config_var, last_id) params = {} if last_id > 0: params['since_id'] = last_id page = 1 result = [] response = [0] # So we stay in the loop. high_id = 0 while response: # Not the cleanest code # TODO: How the interfaces can interrupt this? if page > 1: params['page'] = page final_url = '?'.join([url, urllib.urlencode(params)]) response = self._request(final_url) _log.debug('Page %d, %d results', page, len(response)) if response: # extract the highest id in the respone and save it so we can # use it when requesting data again (using the since_id # parameter) top_tweet_id = response[0]['id'] _log.debug('Top tweet: %d; Highest seen tweet: %d', top_tweet_id, high_id) if top_tweet_id > high_id: high_id = top_tweet_id response_data = _make_datetime(response) result.extend(response_data) page += 1 # Request the next page _log.debug('%d messages, %d threshold' % (len(response_data), self._options[self.NAMESPACE]['message_threshold'])) if (len(response_data) <= self._options[self.NAMESPACE]['message_threshold']): break if last_id == 0: # do not try to download everything if we don't have a # previous list (or we'll blow the available requests in one # short) break # only update the "last seen id" if everything goes alright if high_id > int(self._options[self.NAMESPACE][config_var]): _log.debug('Last tweet updated: %d', high_id) self._options[self.NAMESPACE][config_var] = high_id return result def messages(self): """Return a list of NetworkData objects for the main "timeline".""" return self._timeline('last_tweet', '/statuses/home_timeline.json') def message(self, message_id): """Retrieves the information of one message.""" response = self._request('/statuses/show/%d.json' % (message_id)) return TwitterNetworkData(response) def link(self, message): """Return a link directly to the message.""" return 'http://twitter.com/%s/status/%s' % (message.username, message.id) def reply_prefix(self, message): """Returns the prefix needed for a reply.""" return '@' + message.username + ' ' def replies(self): """Return a list of NetworkData objects for the replies for the user messages.""" return self._timeline('last_reply', '/statuses/replies.json') def available_requests(self): """Return the current user rate limit.""" if self._rate_limit: return self._rate_limit data = self._request('/account/rate_limit_status.json') _log.debug('Requests: %s', data) return int(data['remaining_hits']) def update(self, status, reply_to=None): """Update the user status.""" if len(status) > 140: warnings.warn('Message too long', MessageTooLongWarning) # In Python 2.5, urllib.urlencode calls str(), which removes the # unicodeness of the "status". So we need to convert those peski # accents to HTML entities, so everything falls into ASCII. body = { 'status': htmlize(status), 'source': 'mitter'} if reply_to: if isinstance(reply_to, NetworkData): body['in_reply_to_status_id'] = reply_to.id # This is to protect the user from himself. You don't *need* # to start a reply with a @, but it looks really # confusing in the Twiter website. So if the line doesn't # start with the username of the original user, we add it # for the user. if not status.startswith('@' + reply_to.username): body['status'] = '@' + reply_to.username + ' ' + \ status else: body['in_reply_to_status_id'] = reply_to _log.debug('Body: %s', body) body = urllib.urlencode(body) _log.debug('Message to twitter: %s' % (body)) data = self._request('/statuses/update.json', body=body) # TODO: Check if twitter sends an error message when the message is # too large. return TwitterNetworkData(data) def repost(self, message): """Repost a message.""" body = urllib.urlencode({'id': message.id}) resource = '/statuses/retweet/%d.json' % (message.id) data = self._request(resource, body=body) return TwitterNetworkData(data) def favourite(self, message): """Mark a message as favourite.""" body = urllib.urlencode({'id': message.id}) if not message.favourite: resource = '/favorites/create/%d.json' % (message.id) else: resource = '/favorites/destroy/%d.json' % (message.id) data = self._request(resource, body=body) return TwitterNetworkData(data) def delete_message(self, message): """Delete a message.""" if isinstance(message, NetworkData): message = message.id # We don't need anything else for Twitter # make a body, so _request makes it a post. body = urllib.urlencode({'id': message}) resource = '/statuses/destroy/%s.json' % (message) response = self._request(resource, body=body) _log.debug('Delete response: %s', response) return True # Either we get a response or an exception before we reach # this. def can_delete(self, message): """Check if the message belongs to the user. If so, returns True; False otherwise.""" return (message.username == self._options[self.NAMESPACE]['username']) def can_reply(self, message): """Always return True; Twitter allows replying to any messages, including the ones from the user.""" return True def can_repost(self, message): """Twitter ignores retweets from the user.""" return not (message.username == self._options[self.NAMESPACE]['username']) def can_favourite(self, message): """Always return True; Twitter allows favouriting/unfavouriting any messages.""" return True