Browse Source

UTF8 testing, not workng. I really cant figure out what is wrong with Twitter

master
Julio Biason 15 years ago
parent
commit
d86724036d
  1. 60
      mitterlib/network/twitter.py
  2. 7
      tests.py

60
mitterlib/network/twitter.py

@ -25,10 +25,11 @@ import base64
import htmlentitydefs
import re
import warnings
import htmlentitydefs
from httplib import BadStatusLine
from socket import error as socketError
from mitterlib import htmlize
#from mitterlib import htmlize
from networkbase import NetworkBase, NetworkData, auth_options, \
NetworkDNSError, NetworkBadStatusLineError, NetworkLowLevelError, \
@ -51,6 +52,59 @@ _month_names = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug',
'Sep', 'Oct', 'Nov', 'Dec']
def htmlize(text):
if not isinstance(text, unicode):
text = unicode(text, 'utf-8')
# hex_char = (lambda x: '%' + hex(x)[2:].rjust(2, '0').upper())
#
# codes = []
# for char in text:
# char_code = ord(char)
# bytes = []
# mask = 0
#
# ranges = [65535, 2047, 127]
# for r in ranges:
# if char_code > r:
# mask >>= 1
# mask |= 128
#
# byte = char_code & 63
# byte |= 128
#
# bytes.insert(0, hex_char(byte))
#
# char_code >>= 6
#
# if mask:
# # multibyte char
# mask >>= 1
# mask |= 128
#
# byte = char_code | mask
# bytes.insert(0, hex_char(byte))
# else:
# if char_code < 32:
# # special char and it's not a multibyte char
# bytes.insert(0, hex_char(char_code))
# else:
# # normal, printable char
# bytes.insert(0, char)
#
# codes.append(''.join(bytes))
#
# text = ''.join(codes)
new = []
for char in text:
if ord(char) in htmlentitydefs.codepoint2name:
new.append('&%s;' % (htmlentitydefs.codepoint2name[ord(char)]))
elif ord(char) == 37: # '%'
new.append('%25')
else:
new.append(char)
return ''.join(new)
def _unhtml(text):
"""Convert text coming in HTML encoded to UTF-8 representations."""
new_text = []
@ -116,7 +170,9 @@ class TwitterNetworkData(NetworkData):
self.username = data['user']['screen_name']
self.avatar = data['user']['profile_image_url']
self.message_time = _to_datetime(data['created_at'])
self.favourited = data['favorited']
if 'favorited' in data:
self.favourited = data['favorited']
if 'in_reply_to_status_id' in data and data['in_reply_to_status_id']:
self.parent = int(data['in_reply_to_status_id'])

7
tests.py

@ -66,6 +66,13 @@ class TwitterEncodingTests(unittest.TestCase):
except UnicodeEncodeError:
self.fail('UnicodeEncodeError')
def test_encoding(self):
"""Test the UTF8 encoding on Twitter."""
text = u'À'
result = twitter.htmlize(text)
expected = '%25C3%2580'
self.assertEqual(result, expected)
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save