forked from aashishsatya/Bayesian-Spam-Filter
-
Notifications
You must be signed in to change notification settings - Fork 0
/
SpamClassifier.py
43 lines (32 loc) · 1.12 KB
/
SpamClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
"""
Created on Mon Jul 20 16:13:21 2015
The script that actually does the classification.
@author: aashishsatya
"""
from TrainingSetsUtil import *
# c is an experimentally obtained value
def classify(message, training_set, prior = 0.5, c = 3.7e-4):
"""
Returns the probability that the given message is of the given type of
the training set.
"""
msg_terms = get_words(message)
msg_probability = 1
for term in msg_terms:
if term in training_set:
msg_probability *= training_set[term]
else:
msg_probability *= c
return msg_probability * prior
# uncomment this to provide input to the program
#mail_msg = raw_input('Enter the message to be classified:')
#print ''
#
## 0.2 and 0.8 because the ratio of samples for spam and ham were the 0.2-0.8
#spam_probability = classify(mail_msg, spam_training_set, 0.2)
#ham_probability = classify(mail_msg, ham_training_set, 0.8)
#if spam_probability > ham_probability:
# print 'Your mail has been classified as SPAM.'
#else:
# print 'Your mail has been classified as HAM.'
#print ''