-
Notifications
You must be signed in to change notification settings - Fork 1
/
SiteSuitabilityClassifier.py
59 lines (49 loc) · 3.09 KB
/
SiteSuitabilityClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import yaml
import pandas as pd
import argparse
from pandas import IndexSlice
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--config', type=str, help='Configuration file', default='configs.yaml')
args = parser.parse_args()
config_file = args.config
if config_file is None:
raise ValueError("Please provide a configuration file")
with open(config_file, 'r') as f:
config = yaml.safe_load(f)
master_dates_path = config['master_dates_path']
flow_occurance_path = config['flow_occurance_path']
lower_rp = config['lower_rp']
upper_rp = config['upper_rp']
option = "max"
rps = ['0', '2', '5', '10', '25', '50', '100']
flow_occurance_df = pd.read_parquet(flow_occurance_path)
flow_occurance_df = flow_occurance_df.fillna(0)
print(flow_occurance_df.head(10))
if option == "suitability":
#lower_dict = {'0': ['0'], '2': ['0', '2'], '5': ['0', '2', '5'], '10': ['0', '2', '5', '10'], '25': ['0', '2', '5', '10', '25'], '50': ['0', '2', '5', '10', '25', '50'], '100': ['0', '2', '5', '10', '25', '50', '100']}
#upper_dict = {'0': ['0', '2', '5', '10', '25', '50', '100'], '2': ['2', '5', '10', '25', '50', '100'], '5': ['5', '10', '25', '50', '100'], '10': ['10', '25', '50', '100'], '25': ['25', '50', '100'], '50': ['50', '100'], '100': ['100']}
flow_occurance_df['suitability'] = 0
lower_list = rps[:rps.index(lower_rp)+1]
inner_list = rps[rps.index(lower_rp):rps.index(upper_rp) + 1]
upper_list = rps[rps.index(upper_rp):]
flow_occurance_df.loc[(flow_occurance_df[lower_list].sum(axis=1) > 0) & (flow_occurance_df[upper_list].sum(axis=1) > 0), 'suitability'] = 1
flow_occurance_df.loc[(flow_occurance_df['suitability'] == 1) & (flow_occurance_df[inner_list].min(axis=1) > 0), 'suitability'] = 2
flow_occurance_df.loc[(flow_occurance_df['suitability'] == 2) & (flow_occurance_df[inner_list].sum(axis=1) > len(inner_list)), 'suitability'] = flow_occurance_df[inner_list].sum(axis=1)
print(flow_occurance_df.loc[flow_occurance_df['suitability'] >= 2])
if option == 'max':
flow_occurance_df['suitability'] = 0
rp_prev = 0
for rp in rps:
flow_occurance_df.loc[(flow_occurance_df[rp] != 0) & (flow_occurance_df['suitability'] == rp_prev), 'suitability'] = int(rp)
rp_prev = int(rp)
flow_occurance_df.to_parquet('/Users/ldp/Documents/max_rp.parquet')
print("Suitability saved to max_rp.parquet")
#filter to Indexes with ASC and group by suitability and count
print(flow_occurance_df.loc[pd.IndexSlice[:,'ASC']].groupby('suitability').count())
print(flow_occurance_df.loc[pd.IndexSlice[:, 'DES']].groupby('suitability').count())
#simplify to a single index by keeping the row with the highest suitability
flow_occurance_max_df = flow_occurance_df.loc[flow_occurance_df.groupby('v2number')['suitability'].idxmax()]
print(flow_occurance_max_df.groupby('suitability').count())
print(flow_occurance_df['suitability'].value_counts())
print(flow_occurance_df.head(10))