From ca92e64f990ebe984408b0a5455b92250111d19f Mon Sep 17 00:00:00 2001 From: montanaro Date: Tue, 21 Sep 2010 04:00:56 +0000 Subject: [PATCH] Try to catch problems with message processing. --- spambayes/contrib/tte.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/spambayes/contrib/tte.py b/spambayes/contrib/tte.py index dfde7981..6d7aa481 100644 --- a/spambayes/contrib/tte.py +++ b/spambayes/contrib/tte.py @@ -152,8 +152,12 @@ def train(store, hambox, spambox, maxmsgs, maxrounds, tdict, reverse, verbose, sys.stdout.flush() tokens = list(tokenize(train_msg)) - score = store.spamprob(tokens) selector = train_msg["message-id"] or train_msg["subject"] + try: + score = store.spamprob(tokens) + except UnicodeDecodeError: + print >> sys.stderr, "Unicode error while processing", selector + continue if misclassified(train_spam, score) and selector is not None: if verbose: