-
Notifications
You must be signed in to change notification settings - Fork 2
/
01_star_rating_pipelinebuild_raju.py
73 lines (51 loc) · 1.99 KB
/
01_star_rating_pipelinebuild_raju.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# -*- coding: utf-8 -*-
"""01_star_rating_pipelinebuild_raju.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1gZe7Uane3A-4rEmV-cUEIuT6wRISPLIX
"""
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
from nltk.corpus import stopwords
from tqdm import tqdm
from tqdm import tqdm_notebook
import string
from collections import Counter
import itertools
from sklearn import metrics
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import LinearSVC
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,precision_score,recall_score,classification_report
import pickle
ca_restaurant_reviews=pd.read_csv("california_restaurants.csv")
ca_restaurant_reviews=ca_restaurant_reviews.reset_index(drop = True)
texts = []
stars = [ca_restaurant_reviews['review_stars'] for review in ca_restaurant_reviews]
pbar = tqdm(total=ca_restaurant_reviews.shape[0]+1)
for index, row in ca_restaurant_reviews.iterrows():
texts.append(clean_text(row['text']))
pbar.update(1)
pbar.close()
ca_restaurant_reviews.head(1)
type(texts[1])
texts = [ca_restaurant_reviews['text'] for review in ca_restaurant_reviews]
new_text=ca_restaurant_reviews['text'].values.tolist()
new_stars=ca_restaurant_reviews["review_stars"].values.tolist()
X_train, X_test, y_train, y_test = train_test_split(new_text, new_stars, test_size=0.15, random_state=42, shuffle =False)
training_pipe=Pipeline([("Vectorization",TfidfVectorizer()),("SVC",LinearSVC())])
training_pipe
y_train[0]
training_pipe.fit(X_train,y_train)
out=training_pipe.predict(['Worst Restaurant'])[0]
out
with open('svcpipe.pickle', 'wb') as f:
pickle.dump(training_pipe, f)
with open('svcpipe.pickle', 'rb') as f:
loadedpipe = pickle.load(f)
testing=loadedpipe.predict(["This is a good restaurant, but the service is average"])
testing[0]