UTF8 testing, not workng. I really cant figure out what is wrong with Twitter

15 years ago · d86724036d
2 changed files with 65 additions and 2 deletions
--- a/mitterlib/network/twitter.py
+++ b/mitterlib/network/twitter.py
@ -25,10 +25,11 @@ import base64
 import htmlentitydefs
 import re
 import warnings
 import htmlentitydefs
 from httplib import BadStatusLine
 from socket import error as socketError
-from mitterlib import htmlize
+#from mitterlib import htmlize
 from networkbase import NetworkBase, NetworkData, auth_options, \
        NetworkDNSError, NetworkBadStatusLineError, NetworkLowLevelError, \
@ -51,6 +52,59 @@ _month_names = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
        'Sep', 'Oct', 'Nov', 'Dec']
 def htmlize(text):
    if not isinstance(text, unicode):
        text = unicode(text, 'utf-8')
 #    hex_char = (lambda x: '%' + hex(x)[2:].rjust(2, '0').upper())
 #
 #    codes = []
 #    for char in text:
 #        char_code = ord(char)
 #        bytes = []
 #        mask = 0
 #
 #        ranges = [65535, 2047, 127]
 #        for r in ranges:
 #            if char_code > r:
 #                mask >>= 1
 #                mask |= 128
 #
 #                byte = char_code & 63
 #                byte |= 128
 #
 #                bytes.insert(0, hex_char(byte))
 #
 #                char_code >>= 6
 #
 #        if mask:
 #            # multibyte char
 #            mask >>= 1
 #            mask |= 128
 #
 #            byte = char_code | mask
 #            bytes.insert(0, hex_char(byte))
 #        else:
 #            if char_code < 32:
 #                # special char and it's not a multibyte char
 #                bytes.insert(0, hex_char(char_code))
 #            else:
 #                # normal, printable char
 #                bytes.insert(0, char)
 #
 #        codes.append(''.join(bytes))
 #
 #    text = ''.join(codes)
    new = []
    for char in text:
        if ord(char) in htmlentitydefs.codepoint2name:
            new.append('&%s;' % (htmlentitydefs.codepoint2name[ord(char)]))
        elif ord(char) == 37:   # '%'
            new.append('%25')
        else:
            new.append(char)
    return ''.join(new)
 def _unhtml(text):
    """Convert text coming in HTML encoded to UTF-8 representations."""
    new_text = []
@ -116,7 +170,9 @@ class TwitterNetworkData(NetworkData):
        self.username = data['user']['screen_name']
        self.avatar = data['user']['profile_image_url']
        self.message_time = _to_datetime(data['created_at'])
-        self.favourited = data['favorited']
+
        if 'favorited' in data:
            self.favourited = data['favorited']
        if 'in_reply_to_status_id' in data and data['in_reply_to_status_id']:
            self.parent = int(data['in_reply_to_status_id'])
--- a/tests.py
+++ b/tests.py
@ -66,6 +66,13 @@ class TwitterEncodingTests(unittest.TestCase):
        except UnicodeEncodeError:
            self.fail('UnicodeEncodeError')
    def test_encoding(self):
        """Test the UTF8 encoding on Twitter."""
        text = u'À'
        result = twitter.htmlize(text)
        expected = '%25C3%2580'
        self.assertEqual(result, expected)
 if __name__ == '__main__':
    unittest.main()