-
Notifications
You must be signed in to change notification settings - Fork 34
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
b9d29a6
commit 1a55c23
Showing
107 changed files
with
566,106 additions
and
4,835 deletions.
There are no files selected for viewing
262 changes: 0 additions & 262 deletions
262
src/evaluation/figure_notebooks/figure_5/5b_marginal_footprinting_motifs-Copy1.ipynb
This file was deleted.
Oops, something went wrong.
3,107 changes: 0 additions & 3,107 deletions
3,107
src/evaluation/figure_notebooks/figure_5/5e_heterodimers_syntax_exhaustive-Copy1.ipynb
This file was deleted.
Oops, something went wrong.
458 changes: 458 additions & 0 deletions
458
src/evaluation/figure_notebooks/figure_5/chip_seq_compare/Untitled.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
40 changes: 40 additions & 0 deletions
40
src/evaluation/figure_notebooks/figure_5/chip_seq_compare/atac_to_chip.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
GM12878 IRF4 ENCSR000BGY | ||
GM12878 CTCF ENCSR000DZN | ||
GM12878 SPI1 ENCSR000BGQ | ||
GM12878 RUNX ENCSR000BRI | ||
GM12878 JDP2 ENCSR000BGT | ||
GM12878 NFKB1 ENCSR387QUV | ||
GM12878 IRF4 None | ||
GM12878 ELK1 ENCSR000DZB | ||
GM12878 SP1 ENCSR000BHK | ||
GM12878 NFYA ENCSR000DNN | ||
K562 GATA1 ENCFF148JKK | ||
K562 CTCF ENCSR000EGM | ||
K562 SP1 ENCSR991ELG | ||
K562 NFE2L1 ENCSR632SHZ | ||
K562 ELK1 ENCFF119SCQ | ||
K562 NFYB ENCSR000EGQ | ||
K562 CEBPG ENCSR490LWA | ||
K562 None None | ||
K562 NRF1 ENCFF626VDA | ||
K562 BACH1 ENCFF543FNN | ||
HEPG2 CTCF ENCSR000BIE | ||
HEPG2 HNF4G ENCSR000BNJ | ||
HEPG2 FOXM1 ENCSR435ARI | ||
HEPG2 SP1 ENCSR334KIQ | ||
HEPG2 FOXO3 ENCSR321OAA | ||
HEPG2 HNF1B ENCSR127XTZ | ||
HEPG2 ATF3 ENCSR000BKE | ||
HEPG2 NFYB ENCSR935GZV | ||
HEPG2 ELK1 ENCSR717QSS | ||
HEPG2 CEBPG ENCSR639IIZ | ||
H1ESC CTCF ENCSR000AMF | ||
H1ESC POU5F1 ENCSR000BMU | ||
H1ESC ZIC3 None | ||
H1ESC SP1 ENCSR000BIR | ||
H1ESC NANOG ENCSR000BMT | ||
H1ESC TEAD1 ENCSR000BRY | ||
H1ESC SOX2 None | ||
H1ESC NFY None | ||
H1ESC RFX ENCSR000ECF | ||
H1ESC SIX ENCSR000BIQ |
84 changes: 84 additions & 0 deletions
84
src/evaluation/figure_notebooks/figure_5/chip_seq_compare/atac_to_chip_1.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
GM12878 IRF4 ENCSR000BGY | ||
GM12878 CTCF ENCSR000DZN | ||
GM12878 SPI1 ENCSR000BGQ | ||
GM12878 RUNX3 ENCSR000BRI | ||
GM12878 BATF.JUNB ENCSR000BGT | ||
GM12878 RELB ENCSR387QUV | ||
GM12878 IRF4.SPI1 ENCSR000BGY | ||
GM12878 GABPA ENCSR331HPA | ||
GM12878 SP1 ENCSR000BHK | ||
GM12878 NFYA ENCSR000DNN | ||
GM12878 RELB ENCSR387QUV | ||
GM12878 BATF ENCSR000BGT | ||
GM12878 NRF1 ENCSR000DZO | ||
GM12878 POU2F2 ENCSR000BGP | ||
GM12878 ATF3.CREB1 ENCSR000BJY | ||
GM12878 RELB ENCSR387QUV | ||
GM12878 BATF ENCSR000BGT | ||
GM12878 ZNF143 ENCSR936XTK | ||
GM12878 EBF1 ENCSR000BGU | ||
GM12878 None None | ||
GM12878 PAX5 ENCSR000BHJ | ||
K562 GATA1.TAL1 ENCFF148JKK | ||
K562 CTCF ENCSR000EGM | ||
K562 SP1 ENCSR991ELG | ||
K562 NFE2L1 ENCSR632SHZ | ||
K562 ELK1 ENCFF119SCQ | ||
K562 NFYA ENCSR163VTS | ||
K562 CEBPG ENCSR490LWA | ||
K562 SPI1 ENCSR000BGW | ||
K562 NRF1 ENCFF626VDA | ||
K562 NFE2 ENCSR000FCC | ||
K562 ATF2 ENCSR014ARU | ||
K562 NFIX ENCSR574VJG | ||
K562 ZNF143 ENCSR427WZJ | ||
K562 None None | ||
K562 YY1 ENCSR000EWF | ||
K562 STAT5A ENCSR000BRR | ||
K562 ZBTB33 ENCSR876GXA | ||
K562 SP1 ENCSR991ELG | ||
K562 TFE3 ENCSR953KEY | ||
K562 ATF2 ENCSR014ARU | ||
K562 ZNF143 ENCSR427WZJ | ||
HEPG2 CTCF ENCSR000BIE,ENCSR607XFI,ENCSR019NPF,ENCSR000DUG,ENCSR000AMA,ENCSR000DLS | ||
HEPG2 HNF4A ENCSR469FBY | ||
HEPG2 FOXA2 ENCSR066EBK | ||
HEPG2 SP5 ENCSR019NPF | ||
HEPG2 FOXA1 | ||
HEPG2 HNF1A | ||
HEPG2 JUND | ||
HEPG2 NFYB | ||
HEPG2 GABPA | ||
HEPG2 CEBPB | ||
HEPG2 NR5A1 | ||
HEPG2 NR5A1 | ||
HEPG2 FOXA1 | ||
HEPG2 FOXA2 | ||
HEPG2 SOX13 | ||
HEPG2 FOXA1 | ||
HEPG2 NRF1 | ||
HEPG2 TEAD4 | ||
HEPG2 ZNF143 | ||
HEPG2 CEBPB | ||
HEPG2 ATF4 | ||
H1ESC CTCF | ||
H1ESC POU5F1 | ||
H1ESC None | ||
H1ESC SP1 | ||
H1ESC POU5F1.NANOG | ||
H1ESC TEAD4 | ||
H1ESC NANOG | ||
H1ESC None | ||
H1ESC RFX5 | ||
H1ESC SIX5 | ||
H1ESC TEAD4 | ||
H1ESC ZNF143 | ||
H1ESC JUN | ||
H1ESC NRF1 | ||
H1ESC RFX5 | ||
H1ESC REST | ||
H1ESC CTCF | ||
H1ESC None | ||
H1ESC GABPA | ||
H1ESC None | ||
H1ESC JUN |
92 changes: 92 additions & 0 deletions
92
src/evaluation/figure_notebooks/figure_5/chip_seq_compare/compute_imp_scores.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
import argparse | ||
import pandas as pd | ||
import pyBigWig | ||
import pybedtools | ||
import numpy as np | ||
|
||
parser=argparse.ArgumentParser(description="") | ||
parser.add_argument("-ac","--atac_counts", required=True, help="") | ||
parser.add_argument("-ap","--atac_profile", required=True, help="") | ||
parser.add_argument("-dc","--dnase_counts", required=True, help="") | ||
parser.add_argument("-dp","--dnase_profile", required=True, help="") | ||
parser.add_argument("-cb","--chip_bed", required=True, help="") | ||
parser.add_argument("-cc","--chip_contrib", required=True, help="") | ||
parser.add_argument("-tb","--tobias_bed", required=True, help="") | ||
parser.add_argument("-o","--outf", required=True, help="") | ||
parser.add_argument("-bwo","--observed", required=True, help="") | ||
parser.add_argument("-dbwo","--dnobserved", required=True, help="") | ||
args = parser.parse_args() | ||
|
||
|
||
|
||
if __name__=="__main__": | ||
|
||
a = pybedtools.example_bedtool(args.tobias_bed) | ||
#b = pybedtools.example_bedtool(args.chip_bed) | ||
bdf = pd.read_csv(args.chip_bed, sep='\t', header=None) | ||
bdf[1] = bdf[1] + bdf[9] - 500 | ||
bdf[2] = bdf[1] + 1000 | ||
bdf[bdf[1] < 0][1] = 0 | ||
|
||
b = pybedtools.BedTool.from_dataframe(bdf) | ||
a_and_b = a.intersect(b, c=True, f=1.0) | ||
result = a_and_b.to_dataframe() | ||
print(result.head()) | ||
result['label'] = result["blockSizes"] > 0 | ||
|
||
tobia_bed = pd.read_csv(args.tobias_bed, sep="\t", header=None) | ||
bw_counts = pyBigWig.open(args.atac_counts) | ||
bw_profile = pyBigWig.open(args.atac_profile) | ||
dn_bw_counts = pyBigWig.open(args.dnase_counts) | ||
dn_bw_profile = pyBigWig.open(args.dnase_profile) | ||
bw_chip = pyBigWig.open(args.chip_contrib) | ||
bw_obs = pyBigWig.open(args.observed) | ||
dn_bw_obs = pyBigWig.open(args.dnobserved) | ||
|
||
|
||
chip_seq_vals = [] | ||
atac_counts = [] | ||
atac_profiles = [] | ||
obs_vals = [] | ||
dnase_counts = [] | ||
dnase_profiles = [] | ||
dnase_obs_vals = [] | ||
|
||
tobia_bed = tobia_bed[result['label']].reset_index(drop=True) | ||
|
||
for i,r in tobia_bed.iterrows(): | ||
val1 = np.sum(np.nan_to_num(bw_counts.values(r[0],r[1],r[2]))) | ||
atac_counts.append(val1) | ||
|
||
val1 = np.sum(np.nan_to_num(bw_profile.values(r[0],r[1],r[2]))) | ||
atac_profiles.append(val1) | ||
|
||
val1 = np.sum(np.nan_to_num(dn_bw_counts.values(r[0],r[1],r[2]))) | ||
dnase_counts.append(val1) | ||
|
||
val1 = np.sum(np.nan_to_num(dn_bw_profile.values(r[0],r[1],r[2]))) | ||
dnase_profiles.append(val1) | ||
|
||
val1 = np.sum(np.nan_to_num(bw_chip.values(r[0],r[1],r[2]))) | ||
chip_seq_vals.append(val1) | ||
|
||
mid=int((r[1]+r[2])/2) | ||
val1 = np.sum(np.nan_to_num(bw_obs.values(r[0],mid-150,mid+150))) | ||
obs_vals.append(val1) | ||
|
||
|
||
mid=int((r[1]+r[2])/2) | ||
val1 = np.sum(np.nan_to_num(dn_bw_obs.values(r[0],mid-150,mid+150))) | ||
dnase_obs_vals.append(val1) | ||
|
||
tobia_bed["counts_contrib"] = atac_counts | ||
tobia_bed["profiles_contrib"] = atac_profiles | ||
tobia_bed["dnase_counts_contrib"] = dnase_counts | ||
tobia_bed["dnase_profiles_contrib"] = dnase_profiles | ||
tobia_bed["chip"] = chip_seq_vals | ||
tobia_bed["observed"] = obs_vals | ||
tobia_bed["dnase_observed"] = dnase_obs_vals | ||
#tobia_bed["label"] = result['label'] | ||
|
||
|
||
tobia_bed.to_csv(args.outf, sep='\t', header=False, index=False) |
Oops, something went wrong.