From 3661926e5af3f1164f1857a240faedbe3a120a16 Mon Sep 17 00:00:00 2001 From: Julio Biason Date: Mon, 17 May 2010 15:21:33 -0300 Subject: [PATCH] changed the regexp to include everything except punctuation. I think it may go all around (greedy), though. --- mitterlib/network/twitter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/mitterlib/network/twitter.py b/mitterlib/network/twitter.py index 0979f14..b1aed6e 100644 --- a/mitterlib/network/twitter.py +++ b/mitterlib/network/twitter.py @@ -26,6 +26,7 @@ import htmlentitydefs import re import warnings import gettext +import string from httplib import BadStatusLine from socket import error as socketError @@ -172,10 +173,10 @@ class TwitterNetworkData(NetworkData): self.message = _unhtml(data['text']) # regular expression for users - self.user_regexp = r'@\w+' + self.user_regexp = r'@[^ ' + string.punctuation + ']' # regular expression for hashtags - self.tag_regexp = r'#\w+' + self.tag_regexp = r'#[^ ' + string.punctuation + ']' return