dssg · DhananjayAshok · Apr 21, 2023 · Apr 21, 2023 · Apr 21, 2023
diff --git a/bias_adjustment_analysis_2023/20210414_comparison_figures.ipynb b/bias_adjustment_analysis_2023/20210414_comparison_figures.ipynb
diff --git a/bias_adjustment_analysis_2023/DJRecallAdjuster.py b/bias_adjustment_analysis_2023/DJRecallAdjuster.py
diff --git a/bias_adjustment_analysis_2023/README.md b/bias_adjustment_analysis_2023/README.md
@@ -0,0 +1,19 @@
+# Replicating Multiple Timestep Bias Adjustment and Bias Adjustment Analysis results
+
+The code in this directory provides step-by-step instructions for replicating the results from our recent study of fairness-accuracy trade-offs on the Education Crowdfunding, Joco, Housing Safety and El Salvador dataset. 
+
+
+## Requirements
+
+You'll need a postgres server (version 11.10 or above) with around 300 GB of free disk space to load the data extract as well as machine running python 3.7 or higher for the analysis.
+
+## Getting Set Up
+
+1. Install the python requirements on your machine by running `pip install -r requirements.txt` from this directory
+2. Go to  peeps-chili/config and fill in credentials in the file `db_default_profile`
+3. Start a jupyter notebook server and open the desired notebook (general_notebook for multi timestep adjustment results, entity_analysis and single_model_analysis for entity and single model cases) in your browser
+4. Follow the instructions in the notebook for reproducing the figures from the study or re-running the bias analysis. The notebook also provides some notes on further exploring the data and results now that you have it loaded as well.
+
+
+## Changing Run Information
+Each dataset has a directory with a config file inside it. See DJRecallAdjuster.py to get a sense of what options the config file takes in. To change the dataset run in the notebooks you need to change the `database_directory` variable (often on line 2) to the appropriate directory name, and ensure the config file is set up properly. 
diff --git a/bias_adjustment_analysis_2023/config.txt b/bias_adjustment_analysis_2023/config.txt
@@ -0,0 +1 @@
+{"scoring": {"testing_metric_groups": [{"metrics": ["precision@", "recall@"], "thresholds": {"top_n": [1, 10, 100, 1000, 2500, 5000, 7500, 10000], "percentiles": [1, 2, 3, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]}}, {"metrics": ["roc_auc"]}], "training_metric_groups": [{"metrics": ["precision@"], "thresholds": {"top_n": [1, 10, 100, 1000, 2000, 5000, 10000], "percentiles": [1, 2, 3, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]}}]}, "grid_config": {"sklearn.dummy.DummyClassifier": {"strategy": ["uniform"]}, "sklearn.ensemble.AdaBoostClassifier": {"n_estimators": [500, 1000]}, "sklearn.ensemble.ExtraTreesClassifier": {"n_jobs": [-1], "criterion": ["entropy"], "max_depth": [20, 50, 100], "max_features": ["log2"], "n_estimators": [10, 50, 1000], "min_samples_split": [5, 25, 50]}, "sklearn.ensemble.RandomForestClassifier": {"n_jobs": [-1], "criterion": ["gini", "entropy"], "max_depth": [50, 100], "class_weight": [null], "max_features": ["sqrt"], "n_estimators": [1000, 2000, 3000], "min_samples_split": [10, 50]}, "triage.component.catwalk.estimators.classifiers.ScaledLogisticRegression": {"C": [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], "penalty": ["l1", "l2"]}}, "random_seed": 23456789, "label_config": {"query": "with projectinfo as \n(\n  select projectid, total_price_excluding_optional_support as funding_requirement\n  from projects\n),\ndonationinfo as\n(\n  select \n  projectid, \n  sum(donation_to_project) as sum_donations \n  from donations \n  where \n    donation_timestamp < '{as_of_date}'::timestamp + interval '{label_timespan}'\n  group by projectid\n),\nids_to_map as \n(\n  select \n    projectinfo.projectid, \n    sum_donations,\n    funding_requirement,\n    ((donationinfo.sum_donations < 0.1 * projectinfo.funding_requirement)::bool)::integer as outcome\n  from projectinfo, donationinfo\n  where projectinfo.projectid = donationinfo.projectid\n)\nselect \n  entity_id, outcome\nfrom projectid_mapping join ids_to_map using(projectid)\n", "include_missing_labels_in_train_as": true}, "cohort_config": {"name": "projects_posted_today", "query": " with cohort as ( select projectid, total_price_excluding_optional_support as funding_requirement from projects where date_posted = '{as_of_date}'::timestamp - interval '1d' ) select entity_id from cohort join projectid_mapping using(projectid) "}, "model_comment": "all_features", "config_version": "v7", "temporal_config": {"label_end_time": "2014-01-01", "test_durations": ["6month"], "feature_end_time": "2014-01-01", "label_start_time": "2010-01-01", "feature_start_time": "2010-01-01", "test_label_timespans": ["4month"], "max_training_histories": ["1y"], "model_update_frequency": "2month", "training_label_timespans": ["4month"], "test_as_of_date_frequencies": ["1day"], "training_as_of_date_frequencies": ["1day"]}, "feature_aggregations": [{"groups": ["entity_id"], "prefix": "prstat", "from_obj": "(select \n  entity_id,\n  total_price_excluding_optional_support as funding_requirement, \n  school_charter::integer,\n  school_nlns::integer,\n  school_kipp::integer,\n  school_magnet::integer,\n  school_year_round::integer,\n  teacher_teach_for_america::integer,\n  teacher_ny_teaching_fellow::integer,\n  primary_focus_area as prim,\n  secondary_focus_area as sec,\n  resource_type as resource,\n  poverty_level as poverty,\n  students_reached,\n  grade_level as grade,\n  date_posted\nfrom \n  projects join projectid_mapping using(projectid)) as proj\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "funding_requirement"}, {"metrics": ["max"], "quantity": "students_reached"}], "categoricals": [{"column": "poverty", "choices": ["high poverty", "highest poverty", "low poverty", "moderate poverty"], "metrics": ["max"]}, {"column": "grade", "choices": ["Grades 6-8", "Grades 3-5", "Grades 9-12", "Grades PreK-2"], "metrics": ["max"]}, {"column": "prim", "choices": ["Health & Sports", "Special Needs", "Applied Learning", "Math & Science", "Music & The Arts", "History & Civics", "Literacy & Language"], "metrics": ["max"]}, {"column": "sec", "choices": ["Health & Sports", "Special Needs", "Applied Learning", "Math & Science", "Music & The Arts", "History & Civics", "Literacy & Language"], "metrics": ["max"]}, {"column": "resource", "choices": ["Other", "Visitors", "Books", "Trips", "Supplies", "Technology"], "metrics": ["max"]}], "aggregates_imputation": {"all": {"type": "constant", "value": "0"}}, "knowledge_date_column": "date_posted", "categoricals_imputation": {"all": {"type": "null_category"}}}, {"groups": ["entity_id", "teacher_acctid", "schoolid", "school_district", "primary_focus_subject", "primary_focus_area", "resource_type", "grade_level"], "prefix": "dnstat", "from_obj": "(\n  select\n  entity_id,\n  schoolid,\n  school_zip,\n  school_city,\n  school_state,\n  school_district,\n  primary_focus_subject,\n  primary_focus_area,\n  resource_type,\n  teacher_acctid,\n  grade_level,\n  donation_to_project as donation,\n  donation_timestamp\n  from projects\n  join donations using(projectid)\n  join projectid_mapping using(projectid)\n) AS dnstat\n", "intervals": ["all", "1week", "1month", "3month"], "aggregates": [{"metrics": ["sum", "avg", "count"], "quantity": "donation"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "donation_timestamp"}, {"groups": ["entity_id"], "prefix": "posstat_1mo", "from_obj": "(\n  select \n  entity_id,\n  date_posted,\n  frac_teacher,\n  frac_school,\n  frac_district,\n  frac_city,\n  frac_state,\n  frac_pf,\n  frac_res,\n  frac_grade,\n  imp_teacher::integer,\n  imp_school::integer,\n  imp_district::integer,\n  imp_city::integer,\n  imp_state::integer,\n  imp_pf::integer,\n  imp_res::integer,\n  imp_grade::integer\n  from past_success_1month_2\n  join projectid_mapping using(projectid)\n) AS pastsuc1mo\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "frac_teacher"}, {"metrics": ["max"], "quantity": "frac_school"}, {"metrics": ["max"], "quantity": "frac_district"}, {"metrics": ["max"], "quantity": "frac_city"}, {"metrics": ["max"], "quantity": "frac_state"}, {"metrics": ["max"], "quantity": "frac_pf"}, {"metrics": ["max"], "quantity": "frac_res"}, {"metrics": ["max"], "quantity": "frac_grade"}, {"metrics": ["max"], "quantity": "imp_teacher"}, {"metrics": ["max"], "quantity": "imp_school"}, {"metrics": ["max"], "quantity": "imp_district"}, {"metrics": ["max"], "quantity": "imp_city"}, {"metrics": ["max"], "quantity": "imp_state"}, {"metrics": ["max"], "quantity": "imp_pf"}, {"metrics": ["max"], "quantity": "imp_res"}, {"metrics": ["max"], "quantity": "imp_grade"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}, {"groups": ["entity_id"], "prefix": "posstat_3mo", "from_obj": "(\n  select \n  entity_id,\n  date_posted,\n  frac_teacher,\n  frac_school,\n  frac_district,\n  frac_city,\n  frac_state,\n  frac_pf,\n  frac_res,\n  frac_grade,\n  imp_teacher::integer,\n  imp_school::integer,\n  imp_district::integer,\n  imp_city::integer,\n  imp_state::integer,\n  imp_pf::integer,\n  imp_res::integer,\n  imp_grade::integer\n  from past_success_3month_2\n  join projectid_mapping using(projectid)\n) AS pastsuc3mo\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "frac_teacher"}, {"metrics": ["max"], "quantity": "frac_school"}, {"metrics": ["max"], "quantity": "frac_district"}, {"metrics": ["max"], "quantity": "frac_city"}, {"metrics": ["max"], "quantity": "frac_state"}, {"metrics": ["max"], "quantity": "frac_pf"}, {"metrics": ["max"], "quantity": "frac_res"}, {"metrics": ["max"], "quantity": "frac_grade"}, {"metrics": ["max"], "quantity": "imp_teacher"}, {"metrics": ["max"], "quantity": "imp_school"}, {"metrics": ["max"], "quantity": "imp_district"}, {"metrics": ["max"], "quantity": "imp_city"}, {"metrics": ["max"], "quantity": "imp_state"}, {"metrics": ["max"], "quantity": "imp_pf"}, {"metrics": ["max"], "quantity": "imp_res"}, {"metrics": ["max"], "quantity": "imp_grade"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}, {"groups": ["entity_id"], "prefix": "posstat_6mo", "from_obj": "(\n  select \n  entity_id,\n  date_posted,\n  frac_teacher,\n  frac_school,\n  frac_district,\n  frac_city,\n  frac_state,\n  frac_pf,\n  frac_res,\n  frac_grade,\n  imp_teacher::integer,\n  imp_school::integer,\n  imp_district::integer,\n  imp_city::integer,\n  imp_state::integer,\n  imp_pf::integer,\n  imp_res::integer,\n  imp_grade::integer\n  from past_success_6month_2\n  join projectid_mapping using(projectid)\n) AS pastsuc6mo\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "frac_teacher"}, {"metrics": ["max"], "quantity": "frac_school"}, {"metrics": ["max"], "quantity": "frac_district"}, {"metrics": ["max"], "quantity": "frac_city"}, {"metrics": ["max"], "quantity": "frac_state"}, {"metrics": ["max"], "quantity": "frac_pf"}, {"metrics": ["max"], "quantity": "frac_res"}, {"metrics": ["max"], "quantity": "frac_grade"}, {"metrics": ["max"], "quantity": "imp_teacher"}, {"metrics": ["max"], "quantity": "imp_school"}, {"metrics": ["max"], "quantity": "imp_district"}, {"metrics": ["max"], "quantity": "imp_city"}, {"metrics": ["max"], "quantity": "imp_state"}, {"metrics": ["max"], "quantity": "imp_pf"}, {"metrics": ["max"], "quantity": "imp_res"}, {"metrics": ["max"], "quantity": "imp_grade"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}, {"groups": ["entity_id"], "prefix": "text", "from_obj": "(select\n  entity_id,\n  date_posted,\n  n_words as num_words,\n  n_unique as num_unique,\n  n_sentences as num_sentences,\n  n_chars as num_chars,\n  count_q as ctr_quest,\n  count_excl as ctr_excl,\n  avg_words as avg_num_words,\n  fk_readability as fk_read,\n  dc_readability as dc_read,\n  ari_readability as ari_read,\n  gf_readability as gf_read,\n  imp_readability::integer as imp_read,\n  imp_essay::integer as imp_essay\nfrom essay_features\n  join projectid_mapping using (entity_id)\n  join projects using (projectid)) as essay_text\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "num_words"}, {"metrics": ["max"], "quantity": "num_unique"}, {"metrics": ["max"], "quantity": "num_sentences"}, {"metrics": ["max"], "quantity": "num_chars"}, {"metrics": ["max"], "quantity": "ctr_quest"}, {"metrics": ["max"], "quantity": "ctr_excl"}, {"metrics": ["max"], "quantity": "avg_num_words"}, {"metrics": ["max"], "quantity": "fk_read"}, {"metrics": ["max"], "quantity": "dc_read"}, {"metrics": ["max"], "quantity": "ari_read"}, {"metrics": ["max"], "quantity": "gf_read"}, {"metrics": ["max"], "quantity": "imp_read"}, {"metrics": ["max"], "quantity": "imp_essay"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}], "individual_importance": {"methods": [], "n_ranks": 1}, "feature_group_definition": {"prefix": ["prstat", "dnstat", "posstat_1mo", "posstat_3mo", "posstat_6mo", "text"]}, "feature_group_strategies": ["all"]}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"scoring": {"testing_metric_groups": [{"metrics": ["precision@", "recall@"], "thresholds": {"top_n": [1, 10, 100, 1000, 2500, 5000, 7500, 10000], "percentiles": [1, 2, 3, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]}}, {"metrics": ["roc_auc"]}], "training_metric_groups": [{"metrics": ["precision@"], "thresholds": {"top_n": [1, 10, 100, 1000, 2000, 5000, 10000], "percentiles": [1, 2, 3, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 60, 70, 80, 90, 100]}}]}, "grid_config": {"sklearn.dummy.DummyClassifier": {"strategy": ["uniform"]}, "sklearn.ensemble.AdaBoostClassifier": {"n_estimators": [500, 1000]}, "sklearn.ensemble.ExtraTreesClassifier": {"n_jobs": [-1], "criterion": ["entropy"], "max_depth": [20, 50, 100], "max_features": ["log2"], "n_estimators": [10, 50, 1000], "min_samples_split": [5, 25, 50]}, "sklearn.ensemble.RandomForestClassifier": {"n_jobs": [-1], "criterion": ["gini", "entropy"], "max_depth": [50, 100], "class_weight": [null], "max_features": ["sqrt"], "n_estimators": [1000, 2000, 3000], "min_samples_split": [10, 50]}, "triage.component.catwalk.estimators.classifiers.ScaledLogisticRegression": {"C": [0.0001, 0.001, 0.01, 0.1, 1.0, 10.0], "penalty": ["l1", "l2"]}}, "random_seed": 23456789, "label_config": {"query": "with projectinfo as \n(\n select projectid, total_price_excluding_optional_support as funding_requirement\n from projects\n),\ndonationinfo as\n(\n select \n projectid, \n sum(donation_to_project) as sum_donations \n from donations \n where \n donation_timestamp < '{as_of_date}'::timestamp + interval '{label_timespan}'\n group by projectid\n),\nids_to_map as \n(\n select \n projectinfo.projectid, \n sum_donations,\n funding_requirement,\n ((donationinfo.sum_donations < 0.1 * projectinfo.funding_requirement)::bool)::integer as outcome\n from projectinfo, donationinfo\n where projectinfo.projectid = donationinfo.projectid\n)\nselect \n entity_id, outcome\nfrom projectid_mapping join ids_to_map using(projectid)\n", "include_missing_labels_in_train_as": true}, "cohort_config": {"name": "projects_posted_today", "query": " with cohort as ( select projectid, total_price_excluding_optional_support as funding_requirement from projects where date_posted = '{as_of_date}'::timestamp - interval '1d' ) select entity_id from cohort join projectid_mapping using(projectid) "}, "model_comment": "all_features", "config_version": "v7", "temporal_config": {"label_end_time": "2014-01-01", "test_durations": ["6month"], "feature_end_time": "2014-01-01", "label_start_time": "2010-01-01", "feature_start_time": "2010-01-01", "test_label_timespans": ["4month"], "max_training_histories": ["1y"], "model_update_frequency": "2month", "training_label_timespans": ["4month"], "test_as_of_date_frequencies": ["1day"], "training_as_of_date_frequencies": ["1day"]}, "feature_aggregations": [{"groups": ["entity_id"], "prefix": "prstat", "from_obj": "(select \n entity_id,\n total_price_excluding_optional_support as funding_requirement, \n school_charter::integer,\n school_nlns::integer,\n school_kipp::integer,\n school_magnet::integer,\n school_year_round::integer,\n teacher_teach_for_america::integer,\n teacher_ny_teaching_fellow::integer,\n primary_focus_area as prim,\n secondary_focus_area as sec,\n resource_type as resource,\n poverty_level as poverty,\n students_reached,\n grade_level as grade,\n date_posted\nfrom \n projects join projectid_mapping using(projectid)) as proj\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "funding_requirement"}, {"metrics": ["max"], "quantity": "students_reached"}], "categoricals": [{"column": "poverty", "choices": ["high poverty", "highest poverty", "low poverty", "moderate poverty"], "metrics": ["max"]}, {"column": "grade", "choices": ["Grades 6-8", "Grades 3-5", "Grades 9-12", "Grades PreK-2"], "metrics": ["max"]}, {"column": "prim", "choices": ["Health & Sports", "Special Needs", "Applied Learning", "Math & Science", "Music & The Arts", "History & Civics", "Literacy & Language"], "metrics": ["max"]}, {"column": "sec", "choices": ["Health & Sports", "Special Needs", "Applied Learning", "Math & Science", "Music & The Arts", "History & Civics", "Literacy & Language"], "metrics": ["max"]}, {"column": "resource", "choices": ["Other", "Visitors", "Books", "Trips", "Supplies", "Technology"], "metrics": ["max"]}], "aggregates_imputation": {"all": {"type": "constant", "value": "0"}}, "knowledge_date_column": "date_posted", "categoricals_imputation": {"all": {"type": "null_category"}}}, {"groups": ["entity_id", "teacher_acctid", "schoolid", "school_district", "primary_focus_subject", "primary_focus_area", "resource_type", "grade_level"], "prefix": "dnstat", "from_obj": "(\n select\n entity_id,\n schoolid,\n school_zip,\n school_city,\n school_state,\n school_district,\n primary_focus_subject,\n primary_focus_area,\n resource_type,\n teacher_acctid,\n grade_level,\n donation_to_project as donation,\n donation_timestamp\n from projects\n join donations using(projectid)\n join projectid_mapping using(projectid)\n) AS dnstat\n", "intervals": ["all", "1week", "1month", "3month"], "aggregates": [{"metrics": ["sum", "avg", "count"], "quantity": "donation"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "donation_timestamp"}, {"groups": ["entity_id"], "prefix": "posstat_1mo", "from_obj": "(\n select \n entity_id,\n date_posted,\n frac_teacher,\n frac_school,\n frac_district,\n frac_city,\n frac_state,\n frac_pf,\n frac_res,\n frac_grade,\n imp_teacher::integer,\n imp_school::integer,\n imp_district::integer,\n imp_city::integer,\n imp_state::integer,\n imp_pf::integer,\n imp_res::integer,\n imp_grade::integer\n from past_success_1month_2\n join projectid_mapping using(projectid)\n) AS pastsuc1mo\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "frac_teacher"}, {"metrics": ["max"], "quantity": "frac_school"}, {"metrics": ["max"], "quantity": "frac_district"}, {"metrics": ["max"], "quantity": "frac_city"}, {"metrics": ["max"], "quantity": "frac_state"}, {"metrics": ["max"], "quantity": "frac_pf"}, {"metrics": ["max"], "quantity": "frac_res"}, {"metrics": ["max"], "quantity": "frac_grade"}, {"metrics": ["max"], "quantity": "imp_teacher"}, {"metrics": ["max"], "quantity": "imp_school"}, {"metrics": ["max"], "quantity": "imp_district"}, {"metrics": ["max"], "quantity": "imp_city"}, {"metrics": ["max"], "quantity": "imp_state"}, {"metrics": ["max"], "quantity": "imp_pf"}, {"metrics": ["max"], "quantity": "imp_res"}, {"metrics": ["max"], "quantity": "imp_grade"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}, {"groups": ["entity_id"], "prefix": "posstat_3mo", "from_obj": "(\n select \n entity_id,\n date_posted,\n frac_teacher,\n frac_school,\n frac_district,\n frac_city,\n frac_state,\n frac_pf,\n frac_res,\n frac_grade,\n imp_teacher::integer,\n imp_school::integer,\n imp_district::integer,\n imp_city::integer,\n imp_state::integer,\n imp_pf::integer,\n imp_res::integer,\n imp_grade::integer\n from past_success_3month_2\n join projectid_mapping using(projectid)\n) AS pastsuc3mo\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "frac_teacher"}, {"metrics": ["max"], "quantity": "frac_school"}, {"metrics": ["max"], "quantity": "frac_district"}, {"metrics": ["max"], "quantity": "frac_city"}, {"metrics": ["max"], "quantity": "frac_state"}, {"metrics": ["max"], "quantity": "frac_pf"}, {"metrics": ["max"], "quantity": "frac_res"}, {"metrics": ["max"], "quantity": "frac_grade"}, {"metrics": ["max"], "quantity": "imp_teacher"}, {"metrics": ["max"], "quantity": "imp_school"}, {"metrics": ["max"], "quantity": "imp_district"}, {"metrics": ["max"], "quantity": "imp_city"}, {"metrics": ["max"], "quantity": "imp_state"}, {"metrics": ["max"], "quantity": "imp_pf"}, {"metrics": ["max"], "quantity": "imp_res"}, {"metrics": ["max"], "quantity": "imp_grade"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}, {"groups": ["entity_id"], "prefix": "posstat_6mo", "from_obj": "(\n select \n entity_id,\n date_posted,\n frac_teacher,\n frac_school,\n frac_district,\n frac_city,\n frac_state,\n frac_pf,\n frac_res,\n frac_grade,\n imp_teacher::integer,\n imp_school::integer,\n imp_district::integer,\n imp_city::integer,\n imp_state::integer,\n imp_pf::integer,\n imp_res::integer,\n imp_grade::integer\n from past_success_6month_2\n join projectid_mapping using(projectid)\n) AS pastsuc6mo\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "frac_teacher"}, {"metrics": ["max"], "quantity": "frac_school"}, {"metrics": ["max"], "quantity": "frac_district"}, {"metrics": ["max"], "quantity": "frac_city"}, {"metrics": ["max"], "quantity": "frac_state"}, {"metrics": ["max"], "quantity": "frac_pf"}, {"metrics": ["max"], "quantity": "frac_res"}, {"metrics": ["max"], "quantity": "frac_grade"}, {"metrics": ["max"], "quantity": "imp_teacher"}, {"metrics": ["max"], "quantity": "imp_school"}, {"metrics": ["max"], "quantity": "imp_district"}, {"metrics": ["max"], "quantity": "imp_city"}, {"metrics": ["max"], "quantity": "imp_state"}, {"metrics": ["max"], "quantity": "imp_pf"}, {"metrics": ["max"], "quantity": "imp_res"}, {"metrics": ["max"], "quantity": "imp_grade"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}, {"groups": ["entity_id"], "prefix": "text", "from_obj": "(select\n entity_id,\n date_posted,\n n_words as num_words,\n n_unique as num_unique,\n n_sentences as num_sentences,\n n_chars as num_chars,\n count_q as ctr_quest,\n count_excl as ctr_excl,\n avg_words as avg_num_words,\n fk_readability as fk_read,\n dc_readability as dc_read,\n ari_readability as ari_read,\n gf_readability as gf_read,\n imp_readability::integer as imp_read,\n imp_essay::integer as imp_essay\nfrom essay_features\n join projectid_mapping using (entity_id)\n join projects using (projectid)) as essay_text\n", "intervals": ["1d"], "aggregates": [{"metrics": ["max"], "quantity": "num_words"}, {"metrics": ["max"], "quantity": "num_unique"}, {"metrics": ["max"], "quantity": "num_sentences"}, {"metrics": ["max"], "quantity": "num_chars"}, {"metrics": ["max"], "quantity": "ctr_quest"}, {"metrics": ["max"], "quantity": "ctr_excl"}, {"metrics": ["max"], "quantity": "avg_num_words"}, {"metrics": ["max"], "quantity": "fk_read"}, {"metrics": ["max"], "quantity": "dc_read"}, {"metrics": ["max"], "quantity": "ari_read"}, {"metrics": ["max"], "quantity": "gf_read"}, {"metrics": ["max"], "quantity": "imp_read"}, {"metrics": ["max"], "quantity": "imp_essay"}], "aggregates_imputation": {"all": {"type": "constant", "value": 0}}, "knowledge_date_column": "date_posted"}], "individual_importance": {"methods": [], "n_ranks": 1}, "feature_group_definition": {"prefix": ["prstat", "dnstat", "posstat_1mo", "posstat_3mo", "posstat_6mo", "text"]}, "feature_group_strategies": ["all"]}