-
Notifications
You must be signed in to change notification settings - Fork 0
/
3_Missing_value.py
52 lines (46 loc) · 1.16 KB
/
3_Missing_value.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
import pandas as pd
import numpy as np
X = pd.DataFrame({'Age':[100, 200, 300, np.nan, 1000]})
# print(X)
#Output
# Age
# 0 100.0
# 1 200.0
# 2 300.0
# 3 NaN
# 4 1000.0
# 1. SimpleImputer
from sklearn.impute import SimpleImputer
# imputer = SimpleImputer()
# X = imputer.fit_transform(X)
# print(X)
# Output : impute define defult method (mean) in simple imputer
# [[ 100.]
# [ 200.]
# [ 300.]
# [ 400.] ------> mean
# [1000.]]
# Disease and Age Scenario
imputer_ind= SimpleImputer(add_indicator= True, strategy='median')
X = imputer_ind.fit_transform(X)
# print(X)
# Output
# [[ 100. 0.]
# [ 200. 0.]
# [ 300. 0.]
# [ 250. 1.]
# [1000. 0.]]
# 2. Iterative imputer
df = pd.read_csv('Datasets/titanic.csv')
# print(df.head())
# print(df.isnull().sum())
data = df[['Age']]
# print(data)
# from sklearn.experimental import enable_iterative_imputer
# from sklearn.impute import IterativeImputer
# iterative_imp = IterativeImputer(initial_strategy='median').fit_transform(data)
# print(iterative_imp)
# KNN Imputer
from sklearn.impute import KNNImputer
knn_imp = KNNImputer(n_neighbors=2).fit_transform(data)
print(knn_imp)