Seems urlencode is UTF8 safe now

15 years ago · 60e76b5549
1 changed files with 3 additions and 63 deletions
--- a/mitterlib/network/twitter.py
+++ b/mitterlib/network/twitter.py
@ -63,62 +63,6 @@ _month_names = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
        'Sep', 'Oct', 'Nov', 'Dec']


-def htmlize(text):
-    """Convert a normal text to the format required by Twitter (url-encoded
-    and UTF-8 encoded."""
-    # XXX: UTF-8 part is not working as it should, sadly.
-    if not isinstance(text, unicode):
-        text = unicode(text, 'utf-8')
-#    hex_char = (lambda x: '%' + hex(x)[2:].rjust(2, '0').upper())
-#
-#    codes = []
-#    for char in text:
-#        char_code = ord(char)
-#        bytes = []
-#        mask = 0
-#
-#        ranges = [65535, 2047, 127]
-#        for r in ranges:
-#            if char_code > r:
-#                mask >>= 1
-#                mask |= 128
-#
-#                byte = char_code & 63
-#                byte |= 128
-#
-#                bytes.insert(0, hex_char(byte))
-#
-#                char_code >>= 6
-#
-#        if mask:
-#            # multibyte char
-#            mask >>= 1
-#            mask |= 128
-#
-#            byte = char_code | mask
-#            bytes.insert(0, hex_char(byte))
-#        else:
-#            if char_code < 32:
-#                # special char and it's not a multibyte char
-#                bytes.insert(0, hex_char(char_code))
-#            else:
-#                # normal, printable char
-#                bytes.insert(0, char)
-#
-#        codes.append(''.join(bytes))
-#
-#    text = ''.join(codes)
-    new = []
-    for char in text:
-        if ord(char) in htmlentitydefs.codepoint2name:
-            new.append('&%s;' % (htmlentitydefs.codepoint2name[ord(char)]))
-        elif ord(char) == 37:   # '%'
-            new.append('%25')
-        else:
-            new.append(char)
-
-    return ''.join(new)
-
 def _unhtml(text):
    """Convert text coming in HTML encoded to UTF-8 representations."""
    new_text = []
@ -470,12 +414,8 @@ class Connection(NetworkBase):
        if len(status) > 140:
            warnings.warn('Message too long', MessageTooLongWarning)

-        # In Python 2.5, urllib.urlencode calls str(), which removes the
-        # unicodeness of the "status". So we need to convert those peski
-        # accents to HTML entities, so everything falls into ASCII.
-
        body = {
-            'status': htmlize(status),
+            'status': status,
            'source': 'mitter'}

        if reply_to:
@ -494,8 +434,8 @@ class Connection(NetworkBase):
            else:
                body['in_reply_to_status_id'] = reply_to

-        _log.debug('Body: %s', body)
-        body = urllib.urlencode(body)
+        #_log.debug('Body: %s', body)
+        body = urllib.urlencode(body)   # seems urlenconde is UTF8 safe now
        _log.debug('Message to twitter: %s' % (body))

        data = self._request('/statuses/update.json', body=body)