From 60e76b5549c8819abca6b12d3d3f12af4a750020 Mon Sep 17 00:00:00 2001 From: Julio Biason Date: Wed, 20 Jan 2010 22:17:32 -0200 Subject: [PATCH] Seems urlencode is UTF8 safe now --- mitterlib/network/twitter.py | 66 ++---------------------------------- 1 file changed, 3 insertions(+), 63 deletions(-) diff --git a/mitterlib/network/twitter.py b/mitterlib/network/twitter.py index 1b4b5d4..4855259 100644 --- a/mitterlib/network/twitter.py +++ b/mitterlib/network/twitter.py @@ -63,62 +63,6 @@ _month_names = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] -def htmlize(text): - """Convert a normal text to the format required by Twitter (url-encoded - and UTF-8 encoded.""" - # XXX: UTF-8 part is not working as it should, sadly. - if not isinstance(text, unicode): - text = unicode(text, 'utf-8') -# hex_char = (lambda x: '%' + hex(x)[2:].rjust(2, '0').upper()) -# -# codes = [] -# for char in text: -# char_code = ord(char) -# bytes = [] -# mask = 0 -# -# ranges = [65535, 2047, 127] -# for r in ranges: -# if char_code > r: -# mask >>= 1 -# mask |= 128 -# -# byte = char_code & 63 -# byte |= 128 -# -# bytes.insert(0, hex_char(byte)) -# -# char_code >>= 6 -# -# if mask: -# # multibyte char -# mask >>= 1 -# mask |= 128 -# -# byte = char_code | mask -# bytes.insert(0, hex_char(byte)) -# else: -# if char_code < 32: -# # special char and it's not a multibyte char -# bytes.insert(0, hex_char(char_code)) -# else: -# # normal, printable char -# bytes.insert(0, char) -# -# codes.append(''.join(bytes)) -# -# text = ''.join(codes) - new = [] - for char in text: - if ord(char) in htmlentitydefs.codepoint2name: - new.append('&%s;' % (htmlentitydefs.codepoint2name[ord(char)])) - elif ord(char) == 37: # '%' - new.append('%25') - else: - new.append(char) - - return ''.join(new) - def _unhtml(text): """Convert text coming in HTML encoded to UTF-8 representations.""" new_text = [] @@ -470,12 +414,8 @@ class Connection(NetworkBase): if len(status) > 140: warnings.warn('Message too long', MessageTooLongWarning) - # In Python 2.5, urllib.urlencode calls str(), which removes the - # unicodeness of the "status". So we need to convert those peski - # accents to HTML entities, so everything falls into ASCII. - body = { - 'status': htmlize(status), + 'status': status, 'source': 'mitter'} if reply_to: @@ -494,8 +434,8 @@ class Connection(NetworkBase): else: body['in_reply_to_status_id'] = reply_to - _log.debug('Body: %s', body) - body = urllib.urlencode(body) + #_log.debug('Body: %s', body) + body = urllib.urlencode(body) # seems urlenconde is UTF8 safe now _log.debug('Message to twitter: %s' % (body)) data = self._request('/statuses/update.json', body=body)