-
Notifications
You must be signed in to change notification settings - Fork 1
/
constant.py
26 lines (18 loc) · 1.37 KB
/
constant.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# filename that will be used to save and restore the trained classifier model to/from the file.
CLASSIFIER_FILE = "classifier.joblib"
# filename that will be used to save and restore the fitted vectorizer to/from the file.
VECTORIZER_FILE = "vectorizer.joblib"
# how # of lines that panda's read_csv is going to read per each time.
# It's not byte size or filesize to read.
# REDUCE THIS NUMBER IF YOUR PYTHON PROCESS IS GETTING KILLED WHILE RUNNING. It's pretty much due to code taking too much memory for processing.
# With 15,000, it's expecting to have about 6GB of memory maximum in Mac OS (compressed size less than 1GB) with current training data and takes about 3~4 minutes to complete the training.
# Runing time can vary depends on the system.
CVS_CHUNKSIZE = 15000
# has first 161060 sample data(lines)
TRAINING_DATA_PATH = "data/train_sample_161060.csv"
# has first 8241 sample data(lines)
# TRAINING_DATA_PATH = "data/train_sample_8241.csv"
# Following line's datafile is huge and python process will be killed while it is running.. or maybe taking forever with partial_fit
# there are two other training sample data with project, which is data/train_sample_161060.csv and data/train_sample_8241.csv
# that suffix means the line of traning data in the file. Change it to smaller one to see how Detox behaves with different training size/set.
# TRAINING_DATA_PATH = "data/train.csv"