Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow psql port etc and update for current Pandas #62

Open
wants to merge 11 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# Local settings
utils/setup_user_env_local.sh

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,9 @@ bash postgres_make_concepts.sh

Next, you'll need to build 3 additional materialized views necessary for this pipeline. To do this (again with
schema edit permission), navigate to `utils` and run `bash postgres_make_extended_concepts.sh` followed by
`psql -d mimic -f niv-durations.sql`.
`psql -d mimic -f niv-durations.sql`. (You can add extra `psql`
connection parameters; see the start of
`postgres_make_extended_concepts.sh` for details.)

## Step 4: Set Cohort Selection and Extraction Criteria

Expand Down
23 changes: 14 additions & 9 deletions mimic_direct_extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,8 +146,8 @@ def save_pop(
def get_variable_mapping(mimic_mapping_filename):
# Read in the second level mapping of the itemids
var_map = pd.read_csv(mimic_mapping_filename, index_col=None)
var_map = var_map.ix[(var_map['LEVEL2'] != '') & (var_map['COUNT']>0)]
var_map = var_map.ix[(var_map['STATUS'] == 'ready')]
var_map = var_map[(var_map['LEVEL2'] != '') & (var_map['COUNT']>0)]
var_map = var_map[(var_map['STATUS'] == 'ready')]
var_map['ITEMID'] = var_map['ITEMID'].astype(int)

return var_map
Expand Down Expand Up @@ -231,12 +231,12 @@ def save_numerics(

var_map = var_map[
['LEVEL2', 'ITEMID', 'LEVEL1']
].rename_axis(
{'LEVEL2': 'LEVEL2', 'LEVEL1': 'LEVEL1', 'ITEMID': 'itemid'}, axis=1
].rename(
columns={'LEVEL2': 'LEVEL2', 'LEVEL1': 'LEVEL1', 'ITEMID': 'itemid'}
).set_index('itemid')

X['value'] = pd.to_numeric(X['value'], 'coerce')
X.astype({k: int for k in ID_COLS}, inplace=True)
X = X.astype({k: int for k in ID_COLS})

to_hours = lambda x: max(0, x.days*24 + x.seconds // 3600)

Expand Down Expand Up @@ -300,9 +300,9 @@ def save_numerics(

# Get the max time for each of the subjects so we can reconstruct!
if subjects_filename is not None:
np.save(os.path.join(outPath, subjects_filename), data['subject_id'].as_matrix())
np.save(os.path.join(outPath, subjects_filename), data['subject_id'].to_numpy())
if times_filename is not None:
np.save(os.path.join(outPath, times_filename), data['max_hours'].as_matrix())
np.save(os.path.join(outPath, times_filename), data['max_hours'].to_numpy())

#fix nan in count to be zero
idx = pd.IndexSlice
Expand All @@ -321,7 +321,7 @@ def save_numerics(
X = X.drop(columns = drop_col)

########
if dynamic_filename is not None: np.save(os.path.join(outPath, dynamic_filename), X.as_matrix())
if dynamic_filename is not None: np.save(os.path.join(outPath, dynamic_filename), X.to_numpy())
if dynamic_hd5_filename is not None: X.to_hdf(os.path.join(outPath, dynamic_hd5_filename), 'X')

return X
Expand Down Expand Up @@ -732,6 +732,8 @@ def plot_variable_histograms(col_names, df):

ap.add_argument('--psql_host', type=str, default=None,
help='Postgres host. Try "/var/run/postgresql/" for Unix domain socket errors.')
ap.add_argument('--psql_port', type=int, default=None,
help='Postgres port. Defaults to 5432 if not provided.')
ap.add_argument('--psql_dbname', type=str, default='mimic',
help='Postgres database name.')
ap.add_argument('--psql_schema_name', type=str, default='mimiciii',
Expand Down Expand Up @@ -762,6 +764,8 @@ def plot_variable_histograms(col_names, df):
args = vars(ap.parse_args())
for key in sorted(args.keys()):
print(key, args[key])
if args['psql_host'] == "SOCKET":
args['psql_host'] = None

if not isdir(args['resource_path']):
raise ValueError("Invalid resource_path: %s" % args['resource_path'])
Expand Down Expand Up @@ -801,9 +805,10 @@ def plot_variable_histograms(col_names, df):
idx_hd5_filename = splitext(idx_hd5_filename)[0] + '_' + pop_size + splitext(idx_hd5_filename)[1]

dbname = args['psql_dbname']
schema_name = args['psql_schema_name']
schema_name = 'public,' + args['psql_schema_name']
query_args = {'dbname': dbname}
if args['psql_host'] is not None: query_args['host'] = args['psql_host']
if args['psql_port'] is not None: query_args['port'] = args['psql_port']
if args['psql_user'] is not None: query_args['user'] = args['psql_user']
if args['psql_password'] is not None: query_args['password'] = args['psql_password']

Expand Down
16 changes: 8 additions & 8 deletions utils/Makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
SHELL:=/bin/bash

PSQL_EXECUTABLE:=$(shell which psql)
PSQL_EXECUTABLE:=${shell which psql}

MIMIC_CODE_DIR:=${shell grep "MIMIC_CODE_DIR" setup_user_env.sh | cut -d'=' -f2}
MIMIC_CODE_DIR:=${shell source ./setup_user_env.sh && echo $$MIMIC_CODE_DIR}

#=== Commands

Expand Down Expand Up @@ -37,23 +37,23 @@ build_concepts_mimic_code: setup_user_env.sh clone_mimic_code_repo
{ \
source ./setup_user_env.sh; \
cd ${MIMIC_CODE_DIR}/concepts; \
psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./make-concepts.sql; \
psql "${DBSTRING}" -f ./make-concepts.sql; \
cd ../../MIMIC_Extract/utils; \
}

.PHONY: build_extra_concepts
build_extra_concepts: setup_user_env.sh niv-durations.sql crystalloid-bolus.sql colloid-bolus.sql
{ \
source ./setup_user_env.sh; \
psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./niv-durations.sql; \
psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./crystalloid-bolus.sql; \
psql -U ${DBUSER} "${DBSTRING}" -h ${HOST} -f ./colloid-bolus.sql; \
psql "${DBSTRING}" -f ./niv-durations.sql; \
psql "${DBSTRING}" -f ./crystalloid-bolus.sql; \
psql "${DBSTRING}" -f ./colloid-bolus.sql; \
}

#=== Env Checks

.PHONY: has_psql_exe
has_psql_exe: setup_user_env.sh
has_psql_exe:
ifndef PSQL_EXECUTABLE
$(error "Error: 'psql' is undefined. Please install/add to current path.")
${error "Error: 'psql' is undefined. Please install/add to current path."}
endif
4 changes: 3 additions & 1 deletion utils/build_curated_from_psql.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ python -u $MIMIC_EXTRACT_CODE_DIR/mimic_direct_extract.py \
--exit_after_loading 0 \
--plot_hist 0 \
--pop_size $POP_SIZE \
--psql_password $PGPASSWORD \
--psql_user $DBUSER \
--psql_password $DBPASSWORD \
--psql_host $HOST \
--psql_port $PORT \
--min_percent 0 \
18 changes: 12 additions & 6 deletions utils/postgres_make_extended_concepts.sh
Original file line number Diff line number Diff line change
@@ -1,16 +1,22 @@
# This file makes tables for the concepts in this subfolder.
# Be sure to run postgres-functions.sql first, as the concepts rely on those function definitions.
# Note that this may take a large amount of time and hard drive space.
#
# Exporting DBCONNEXTRA before calling this script will add this to the
# connection string. For example, running:
# DBCONNEXTRA="user=mimic password=mimic" bash postgres_make_extended_concepts.sh
# will add these settings to all of the psql calls. (Note that "dbname"
# and "search_path" do not need to be set.)

# string replacements are necessary for some queries
export REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g"
export REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g'
export CONNSTR='-d mimic'
REGEX_DATETIME_DIFF="s/DATETIME_DIFF\((.+?),\s?(.+?),\s?(DAY|MINUTE|SECOND|HOUR|YEAR)\)/DATETIME_DIFF(\1, \2, '\3')/g"
REGEX_SCHEMA='s/`physionet-data.(mimiciii_clinical|mimiciii_derived|mimiciii_notes).(.+?)`/\2/g'
CONNSTR="dbname=mimic $DBCONNEXTRA"

# this is set as the search_path variable for psql
# a search path of "public,mimiciii" will search both public and mimiciii
# schemas for data, but will create tables on the public schema
export PSQL_PREAMBLE='SET search_path TO public,mimiciii'
PSQL_PREAMBLE='SET search_path TO public,mimiciii'

echo ''
echo '==='
Expand All @@ -21,7 +27,7 @@ echo '==='
echo ''

echo 'Directory 5 of 9: fluid_balance'
{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR}
{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql ${CONNSTR}
{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS colloid_bolus; CREATE TABLE colloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/colloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql "${CONNSTR}"
{ echo "${PSQL_PREAMBLE}; DROP TABLE IF EXISTS crystalloid_bolus; CREATE TABLE crystalloid_bolus AS "; cat $MIMIC_CODE_DIR/concepts/fluid_balance/crystalloid_bolus.sql; } | sed -r -e "${REGEX_DATETIME_DIFF}" | sed -r -e "${REGEX_SCHEMA}" | psql "${CONNSTR}"

echo 'Finished creating tables.'
31 changes: 19 additions & 12 deletions utils/setup_user_env.sh
Original file line number Diff line number Diff line change
@@ -1,21 +1,28 @@
#!/bin/bash

export MIMIC_CODE_DIR=$(realpath ../../mimic-code)
export MIMIC_EXTRACT_CODE_DIR=$(realpath ../)

export MIMIC_DATA_DIR=$MIMIC_EXTRACT_CODE_DIR/data/
export MIMIC_EXTRACT_CODE_DIR=$(realpath ..)
export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_EXTRACT_CODE_DIR/data/curated

export MIMIC_EXTRACT_OUTPUT_DIR=$MIMIC_DATA_DIR/curated/
mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR

export DBUSER=bnestor
export DBUSER=mimic
export DBNAME=mimic
export DBPASSWORD=mimic
export SCHEMA=mimiciii
export HOST=mimic
export DBSTRING="dbname=$DBNAME options=--search_path=$SCHEMA"
alias psql="psql -h $HOST -U $DBUSER "
export HOST=SOCKET
export PORT=5432

export PGHOST=$HOST
export PGUSER=$DBUSER
# Allow users to override any of the above in a local configuration file
if [ -f "setup_user_env_local.sh" ]
then
. setup_user_env_local.sh
fi

mkdir -p $MIMIC_EXTRACT_OUTPUT_DIR

export PGPASSWORD=$1
if [ $HOST = SOCKET ]
then
export DBSTRING="port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA"
else
export DBSTRING="host=$HOST port=$PORT user=$DBUSER password=$DBPASSWORD dbname=$DBNAME options=--search_path=$SCHEMA"
fi