Browse Source

UTF8 testing, not workng. I really cant figure out what is wrong with Twitter

master
Julio Biason 15 years ago
parent
commit
d86724036d
  1. 60
      mitterlib/network/twitter.py
  2. 7
      tests.py

60
mitterlib/network/twitter.py

@ -25,10 +25,11 @@ import base64
import htmlentitydefs import htmlentitydefs
import re import re
import warnings import warnings
import htmlentitydefs
from httplib import BadStatusLine from httplib import BadStatusLine
from socket import error as socketError from socket import error as socketError
from mitterlib import htmlize #from mitterlib import htmlize
from networkbase import NetworkBase, NetworkData, auth_options, \ from networkbase import NetworkBase, NetworkData, auth_options, \
NetworkDNSError, NetworkBadStatusLineError, NetworkLowLevelError, \ NetworkDNSError, NetworkBadStatusLineError, NetworkLowLevelError, \
@ -51,6 +52,59 @@ _month_names = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
'Sep', 'Oct', 'Nov', 'Dec'] 'Sep', 'Oct', 'Nov', 'Dec']
def htmlize(text):
if not isinstance(text, unicode):
text = unicode(text, 'utf-8')
# hex_char = (lambda x: '%' + hex(x)[2:].rjust(2, '0').upper())
#
# codes = []
# for char in text:
# char_code = ord(char)
# bytes = []
# mask = 0
#
# ranges = [65535, 2047, 127]
# for r in ranges:
# if char_code > r:
# mask >>= 1
# mask |= 128
#
# byte = char_code & 63
# byte |= 128
#
# bytes.insert(0, hex_char(byte))
#
# char_code >>= 6
#
# if mask:
# # multibyte char
# mask >>= 1
# mask |= 128
#
# byte = char_code | mask
# bytes.insert(0, hex_char(byte))
# else:
# if char_code < 32:
# # special char and it's not a multibyte char
# bytes.insert(0, hex_char(char_code))
# else:
# # normal, printable char
# bytes.insert(0, char)
#
# codes.append(''.join(bytes))
#
# text = ''.join(codes)
new = []
for char in text:
if ord(char) in htmlentitydefs.codepoint2name:
new.append('&%s;' % (htmlentitydefs.codepoint2name[ord(char)]))
elif ord(char) == 37: # '%'
new.append('%25')
else:
new.append(char)
return ''.join(new)
def _unhtml(text): def _unhtml(text):
"""Convert text coming in HTML encoded to UTF-8 representations.""" """Convert text coming in HTML encoded to UTF-8 representations."""
new_text = [] new_text = []
@ -116,7 +170,9 @@ class TwitterNetworkData(NetworkData):
self.username = data['user']['screen_name'] self.username = data['user']['screen_name']
self.avatar = data['user']['profile_image_url'] self.avatar = data['user']['profile_image_url']
self.message_time = _to_datetime(data['created_at']) self.message_time = _to_datetime(data['created_at'])
self.favourited = data['favorited']
if 'favorited' in data:
self.favourited = data['favorited']
if 'in_reply_to_status_id' in data and data['in_reply_to_status_id']: if 'in_reply_to_status_id' in data and data['in_reply_to_status_id']:
self.parent = int(data['in_reply_to_status_id']) self.parent = int(data['in_reply_to_status_id'])

7
tests.py

@ -66,6 +66,13 @@ class TwitterEncodingTests(unittest.TestCase):
except UnicodeEncodeError: except UnicodeEncodeError:
self.fail('UnicodeEncodeError') self.fail('UnicodeEncodeError')
def test_encoding(self):
"""Test the UTF8 encoding on Twitter."""
text = u'À'
result = twitter.htmlize(text)
expected = '%25C3%2580'
self.assertEqual(result, expected)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

Loading…
Cancel
Save