python_trivariate_example

"""

Generate bivariable tables based on cohorts defined by the ICEES API.  In
this approach, we define the following variables:

    Q = cohort definition variable
    A = variable 1
    B = variable 2
+
    Each variable has levels defined by rules (usually equals, less than, greater than, range)

    Output:
        data frame with columns Q, A, B, frequency


Sample ICEES query code at: https://github.com/NCATS-Tangerine/icees-api/blob/master/demonstrations/ICEES_API_Notebook.ipynb

"""
import requests
import sys
import json
import numpy as np
import pandas as pd


def generate_cohort(jsonCohortQuery):
    """ Generate an icees cohort by issuing the cohort condition to the icees API """
    #print("query {0}".format(jsonCohortQuery))
    requests.packages.urllib3.disable_warnings()
    headers = {"Content-Type" : "application/json", "accept": "application/json"}
    responsepkt = requests.post('https://icees.renci.org:16339/patient/2010/cohort', headers=headers, data=jsonCohortQuery, verify=False)
    if responsepkt.status_code != 200:
        print("Request failed {0}\n{1}\n{2}\n{3}".format(responsepkt.status_code, responsepkt.reason, responsepkt.request, responsepkt.text))
        sys.exit(-1)
   # print("Success: query {0}".format(jsonCohortQuery))
    return responsepkt.json()['return value']['cohort_id']


def generate_bivariate_table(cohortid, jsonABquery, featureQ, featureAs, featureBs):
    """ Generate an icees cohort by issuing the cohort condition

        returned format:
            [[frequency, total_percentage, row_percentage, column_percentage],[],],[[],[],],[[],[],],

            inner list is over the dimension of the first feature (a)
            outer list is over the dimension of the second feature (b)
    """
   # print("bivariate {0}".format(cohortid))
    requests.packages.urllib3.disable_warnings()
    headers = {"Content-Type" : "application/json", "accept": "application/json"}
    icees_url = 'https://icees.renci.org:16339/patient/2010/cohort/' + str(cohortid) + '/feature_association2'
    responsepkt = requests.post(icees_url,headers=headers, data=jsonABquery, verify=False)
    if responsepkt.status_code != 200:
        print("Request failed {0}\n{1}\n{2}\n{3}".format(responsepkt.status_code, responsepkt.reason, responsepkt.request, responsepkt.text))
        sys.exit(-1)
   # print("BI response: {0}".format(responsepkt.json()))
    tbldict = responsepkt.json()['return value']['feature_matrix']      # returns a list [frequency, total_per, row_per, col_per]
    blen = len(tbldict)
    alen = len(tbldict[0])

    # now convert the json frequency values to a matrix
    df = pd.DataFrame(index=np.arange(alen*blen), columns=['Q', 'A', 'B', 'frequency'])
    row=0
    for bindex, trb in enumerate(tbldict):
        for aindex, tra in enumerate(trb):
            df.at[row, 'Q'] = featureQ
            df.at[row, 'A'] = featureAs[aindex]
            df.at[row, 'B'] = featureBs[bindex]
            df.at[row, 'frequency'] = float(tra['frequency'])
            row = row + 1
    return df


# first define the query variables.
#
# query for cohort, each line defines a query
featureQs = ["ed0", "ed1", "ed2", "ed3", "ed4", "ed5", "ed5", "ed6", "ed7", "ed8", "ed9", "ed10+"]
jsonQqueries = ['''{"TotalEDInpatientVisits":{"operator":"=","value":0}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":1}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":2}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":3}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":4}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":5}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":6}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":7}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":8}}''',
                '''{"TotalEDInpatientVisits":{"operator":"=","value":9}}''',
                '''{"TotalEDInpatientVisits":{"operator":">","value":9}}''']

# query for bivariate table to generate for each cohort
featureAs = ['pm25_1','pm25_2','pm25_3','pm25_4','pm25_5']
featureBs = ['genotype_hyper','genotype_hypo','genotype_super_hypo','genotype_null']
jsonABquery = '''{"feature_a":{
                    "PM25_ANNUAL_AVERAGE_qcut":[
                        {"operator":"=","value":1}, 
                        {"operator":"=","value":2}, 
                        {"operator":"=","value":3},
                        {"operator":"=","value":4},
                        {"operator":"=","value":5} 
                    ]
	            }, "feature_b":{
    	            "RESPONDER_STATUS":[
                        {"operator":"=","value":"Hyper"}, 
                        {"operator":"=","value":"Hypo"}, 
                        {"operator":"=","value":"Super Hypo"}, 
                        {"operator":"=","value":"Neither"}
    	            ]
	            }
	        }'''


# iterate over each cohort query
df = pd.DataFrame(columns=['Q', 'A', 'B', 'frequency'])
for qindex, jsonCohortQuery in enumerate(jsonQqueries):
    cohortid = generate_cohort(jsonCohortQuery)
    resdf = generate_bivariate_table(cohortid, jsonABquery, featureQs[qindex], featureAs, featureBs)
    df = df.append(resdf,ignore_index=True)
    print("cohort bivariate query res {0}".format(df))

df.to_csv("./output.csv")