Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added several event cullers and SKlearn classifiers #18

Open
wants to merge 9 commits into
base: developing
Choose a base branch
from
91 changes: 90 additions & 1 deletion generics/modules/am_sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
from sklearn.svm import LinearSVC, SVC
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
from sklearn.neighbors import NeighborhoodComponentsAnalysis
from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.tree import DecisionTreeClassifier
import numpy as np

class Linear_SVM_sklearn(AnalysisMethod):
Expand Down Expand Up @@ -170,3 +173,89 @@ def displayName():

def displayDescription():
return "Multinomial naive bayes implemented in scikit-learn."

class LDA_sklearn(AnalysisMethod):


_NoDistanceFunction_ = True

def train(self, train, train_data=None, **options):
train_data, train_labels = self.get_train_data_and_labels(train, train_data)
self._model = QuadraticDiscriminantAnalysis()
train_labels = train_labels.flatten()
self._model.fit(train_data, train_labels)
return


def process(self, docs, Pipe=None, **options):
self.train([d for d in docs if d.author != ""], options.get("known_numbers"))
test_data = self.get_test_data(docs, options)
results = self._model.predict_proba(test_data)
results = self.get_results_dict_from_matrix(1-results)
return results

def displayName():
return "Linear Discriminant Analysis (sklearn)"

def displayDescription():
return "Linear discriminant analysis implemented in scikit-learn"

class Quadratic_discriminant_analysis(AnalysisMethod):

_NoDistanceFunction_ = True

def train(self, train, train_data=None, **options):
train_data, train_labels = self.get_train_data_and_labels(train, train_data)
self._model = QuadraticDiscriminantAnalysis()
train_labels = train_labels.flatten()
self._model.fit(train_data, train_labels)
return


def process(self, docs, Pipe=None, **options):
self.train([d for d in docs if d.author != ""], options.get("known_numbers"))
test_data = self.get_test_data(docs, options)
results = self._model.predict_proba(test_data)
results = self.get_results_dict_from_matrix(1-results)
return results


def displayName():
return "Quadratic Discriminant Analysis (sklearn)"

def displayDescription():
return "Classifier with quadratic decision boundary. Fits a Gaussian density to each class."


class Decision_tree_sklearn(AnalysisMethod):

_NoDistanceFunction_ = True

criterion = "gini"
splitter = "best"
_variable_options = {"criterion":
{"options": ["gini", "entropy", "log_loss"], "default": 0},
"splitter": {"options": ["best", "random"], "default": 0} #The other options seem like too much for the average user
}

def train(self, train, train_data=None, **options):
train_data, train_labels = self.get_train_data_and_labels(train, train_data)
self._model = DecisionTreeClassifier(criterion = self.criterion, splitter= self.splitter)
train_labels = train_labels.flatten()
self._model.fit(train_data, train_labels)
return


def process(self, docs, Pipe=None, **options):
self.train([d for d in docs if d.author != ""], options.get("known_numbers"))
test_data = self.get_test_data(docs, options)
results = self._model.predict_proba(test_data)
results = self.get_results_dict_from_matrix(1-results)
return results


def displayName():
return "Decision tree classifier (sklearn)"

def displayDescription():
return "Decision tree classifier implemented in scikit-learn."
Loading