From 60e76b5549c8819abca6b12d3d3f12af4a750020 Mon Sep 17 00:00:00 2001
From: Julio Biason <julio.biason@gmail.com>
Date: Wed, 20 Jan 2010 22:17:32 -0200
Subject: [PATCH] Seems urlencode is UTF8 safe now

---
 mitterlib/network/twitter.py | 66 ++----------------------------------
 1 file changed, 3 insertions(+), 63 deletions(-)

diff --git a/mitterlib/network/twitter.py b/mitterlib/network/twitter.py
index 1b4b5d4..4855259 100644
--- a/mitterlib/network/twitter.py
+++ b/mitterlib/network/twitter.py
@@ -63,62 +63,6 @@ _month_names = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
         'Sep', 'Oct', 'Nov', 'Dec']
 
 
-def htmlize(text):
-    """Convert a normal text to the format required by Twitter (url-encoded
-    and UTF-8 encoded."""
-    # XXX: UTF-8 part is not working as it should, sadly.
-    if not isinstance(text, unicode):
-        text = unicode(text, 'utf-8')
-#    hex_char = (lambda x: '%' + hex(x)[2:].rjust(2, '0').upper())
-#
-#    codes = []
-#    for char in text:
-#        char_code = ord(char)
-#        bytes = []
-#        mask = 0
-#
-#        ranges = [65535, 2047, 127]
-#        for r in ranges:
-#            if char_code > r:
-#                mask >>= 1
-#                mask |= 128
-#
-#                byte = char_code & 63
-#                byte |= 128
-#
-#                bytes.insert(0, hex_char(byte))
-#
-#                char_code >>= 6
-#
-#        if mask:
-#            # multibyte char
-#            mask >>= 1
-#            mask |= 128
-#
-#            byte = char_code | mask
-#            bytes.insert(0, hex_char(byte))
-#        else:
-#            if char_code < 32:
-#                # special char and it's not a multibyte char
-#                bytes.insert(0, hex_char(char_code))
-#            else:
-#                # normal, printable char
-#                bytes.insert(0, char)
-#
-#        codes.append(''.join(bytes))
-#
-#    text = ''.join(codes)
-    new = []
-    for char in text:
-        if ord(char) in htmlentitydefs.codepoint2name:
-            new.append('&%s;' % (htmlentitydefs.codepoint2name[ord(char)]))
-        elif ord(char) == 37:   # '%'
-            new.append('%25')
-        else:
-            new.append(char)
-
-    return ''.join(new)
-
 def _unhtml(text):
     """Convert text coming in HTML encoded to UTF-8 representations."""
     new_text = []
@@ -470,12 +414,8 @@ class Connection(NetworkBase):
         if len(status) > 140:
             warnings.warn('Message too long', MessageTooLongWarning)
 
-        # In Python 2.5, urllib.urlencode calls str(), which removes the
-        # unicodeness of the "status". So we need to convert those peski
-        # accents to HTML entities, so everything falls into ASCII.
-
         body = {
-            'status': htmlize(status),
+            'status': status,
             'source': 'mitter'}
 
         if reply_to:
@@ -494,8 +434,8 @@ class Connection(NetworkBase):
             else:
                 body['in_reply_to_status_id'] = reply_to
 
-        _log.debug('Body: %s', body)
-        body = urllib.urlencode(body)
+        #_log.debug('Body: %s', body)
+        body = urllib.urlencode(body)   # seems urlenconde is UTF8 safe now
         _log.debug('Message to twitter: %s' % (body))
 
         data = self._request('/statuses/update.json', body=body)