-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGlobal_Warming_Supervised_Learning_Linear_Model(Not a Linear data).py
272 lines (163 loc) · 8.15 KB
/
Global_Warming_Supervised_Learning_Linear_Model(Not a Linear data).py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Jun 15 19:29:01 2019
@author: Suganth Mohan
Description : Used to Predit the weather of India with a simple linear regression model and
see if it is poses a threat to humanity or atleast try to see. :-)
Modules Used:
import json
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
"""
# >>>>>>> ADD YOUR MODULES HERE
#### USED TO HANDLE THE JSON TYPE DATA
import json
#### USED FOR THE NUMPY ARRAY OPERATIONS
import numpy as np
#### USED FOR PLOTTING THE OBSERVATIONS INTO A GRAPH
import matplotlib.pyplot as plt
#### USED FOR DATAFRAMES CREATION AND HANDLING
import pandas as pd
#### USED FOR CREATING A TIME SERIES ON THE DATES
from datetime import datetime
def GetDataSource(*Retain_items):
'''
USED TO EXTRACT THE ITEMS FROM JSON FILE AND CREATE A DATAFRAME WITH THE DATASET
ARGUMENTS :
Retain_items -> used to retain the column names you set to retain before returning,
constraint -> The headers must be an exact match, Otherwise brace yourselves for
errors
'''
Source_Directory = "/Users/Deepak/suganths_terminal/Suganth's Git/GlobalWarming"
Source_File = "Json_Monthly_Seasonal_Annual.json"
Retain_items = list(Retain_items)
# CREATE A FILE HANDLE
with open( Source_Directory + Source_File ) as readFileHandle:
# READ THE RAW DATA AND LOAD IT WITH JSON MODULE
raw_data = json.load(readFileHandle)
# AS PER OUR REQUIREMENT WE WILL ONLY TAKE FROM YEAR TO ALL THE MONTHS
# EXTRACT THE FIELDS LABELS ALONE AND CONVERT IT INTO LIST
Fields_list = pd.io.json.json_normalize(raw_data['fields'])['label'].tolist()
# CREATE THE DATASTRUCTURE OF DATAFRAME
Weatheria_dataStruct = []
# CREATE THE SKELETON FOR DATAFRAME
for row in raw_data['data']:
# CREATE DICTIONARY BY COMBINING TWO LISTS
converted_row = dict(zip(Fields_list,row))
Weatheria_dataStruct.append(converted_row)
dataset = pd.DataFrame(Weatheria_dataStruct)
if len(Retain_items) != 0:
dataset = dataset[Retain_items]
return dataset
else :
return dataset
def ConvertMonthType(Month_):
'''
CovertMonthType -> Is used to Convert the Month name into number format for creating
the time series.
arguments : Month_
It is used to get the month name in `String' and return the corresponding number of the
month
constraint :
Cannot handle any other format otherthan specified below
'''
# CREATE CUSTOM MONTH CONVERSION
my_month_list = {
'JAN' : 1,
'FEB' : 2,
'MAR' : 3,
'APR' : 4,
'MAY' : 5,
'JUN' : 6,
'JUL' : 7,
'AUG' : 8,
'SEP' : 9,
'OCT' : 10,
'NOV' : 11,
'DEC' : 12
}
# RETURN THE NUMBERICAL OF CUSTOM MONTH CONVERSION
return my_month_list[Month_]
def CreateLinearDataset(Dataset_):
'''
CreateLinearDataset -> is used to convert the multi field columns of months and year into
a linear dataset of each month along with the date
arguments :
Dataset_ -> It is the pandas dataframe dataset which we are going to convert into
a linear dataset form.
constraint : The function is written to handle a pandas dataframe dataset with
the matching formats, cannot handle other formats.
returns : Returns the mutli-dimesional matrix with features in the form of a
` Date ' and ` Celcius ' format.
'''
# CREATE A NEW DATAFRAME DATASTRUCTURE
LinearDataSet = []
# ITERATE THROUGH EACH AND EVERY ROWSET
for index,Feature in Dataset_.iterrows():
# CONVERT THE FEATURE INTO DICTIONARY
New_Feature = dict(Feature)
# ITERATE THROUGH THE MONTH SINCE EACH MONTH IS A KEY
for column in New_Feature.keys():
# CREATE LINEAR FEATURE
Linear_Feature = {}
# IF YEAR COLUMN APPEARS, SKIP IT
if column == 'YEAR':
continue
# ADD DATE COLUMN VALUE
Linear_Feature['Date'] = datetime( int( Feature['YEAR'] ), int( ConvertMonthType(Month_ = column) ), 15)
# ADD TEMPERATURE VALUE
Linear_Feature['Celcius'] = New_Feature[column]
# APPEND THE NEW KEYS LIST TO THE NEW LINEAR DATASET
LinearDataSet.append(Linear_Feature)
# SORT THE DATAFRAME ACCORDING TO THE DATE FIELD, SINCE DICTIONARY LOSES ORDER AND RESET INDEX
return pd.DataFrame(LinearDataSet)[['Date','Celcius']].sort_values('Date').reset_index(drop = True)
def main():
# >>>>>>> PREPROCESSING PART
#### GET THE DATASOURCE
Weatheria = GetDataSource('YEAR','JAN','FEB','MAR','APR','MAY','JUN','JUL','AUG','SEP','OCT','NOV','DEC')
#### CONVERT THE MULTI DIMENSIONAL TABLE INTO MONTHLY FACTOR ENTRIES FOR OBSERVATION PURPOSES
LinearDataSet = CreateLinearDataset(Dataset_ = Weatheria)
#### SPLIT THE INDEPENDENT AND DEPENDENT VARIABLES
##### INDEPENDENT VARS
independent_vars = LinearDataSet.iloc[:,0:1].values
##### DEPENDENT VARS
dependent_vars = LinearDataSet.iloc[:,1:2].values
#### CONVERT THE DATA HERE
##### CONVERT THE TIME SERIES INTO LABELED VARIABLES
from sklearn.preprocessing import LabelEncoder
independent_LE = LabelEncoder()
independent_vars = independent_LE.fit_transform(independent_vars[:,0])
###### RESHAPE THE INDEPENDENT VAR OF 1D ARRAY TO MATRIX TYPE HERE
independent_vars = independent_vars.reshape((independent_vars.size,1))
#### SINCE WE ARE USING A SUPERVISED LEARNING MODEL, WE WILL BE
#### USING TRAINING DATA AND TESTING DATA SPLITS
from sklearn.cross_validation import train_test_split
##### SET THE TRAINING SIZE TO 80% which will be 0.8
##### SO THE TEST SIZE WILL BE 1 - 0.8 = 0.2 ( 20% ) TO USE
independent_train,independent_test,dependent_train,dependent_test = train_test_split(
independent_vars,
dependent_vars,
test_size = 0.2,
random_state = 0)
# >>>>>>>> USE YOUR MODEL HERE
#### USE LINEAR MODEL HERE (FOR NOW TO TEST)
from sklearn.linear_model import LinearRegression
regressor = LinearRegression()
regressor.fit(independent_train,dependent_train)
# >>>>>>>> PREDICT YOUR FUTURE VALUES HERE
# Predicting the Test set results
dependent_pred = regressor.predict(independent_test)
# >>>>>>>> PLOT THE GRAPH OF YOUR MODEL HERE
##### VISUALIZE THE TRAINING MODEL RESULTS HERE
# Visualising the Training set results
plt.scatter(independent_train, dependent_train, color = 'red')
plt.plot(independent_train, regressor.predict(independent_train), color = 'blue')
plt.title('Date vs Temperature (Training set)')
plt.xlabel('DATE')
plt.ylabel('TEMPERATURE')
plt.show()
if __name__ == '__main__':
main()