Skip to content

Commit

Permalink
Improve tooltips, humanness header and forms
Browse files Browse the repository at this point in the history
- add tooltips to result page
- improve sequence tooltips
- add "kabat + vernier" CDR definition
- store peptide from starting position instead of center
  • Loading branch information
prihoda committed Aug 5, 2021
1 parent 486ad21 commit 327ac82
Show file tree
Hide file tree
Showing 21 changed files with 444 additions and 327 deletions.
28 changes: 24 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ you will need to download the OASis database (22GB uncompressed).
If you have [Docker](https://www.docker.com/products/docker-desktop),
you can run a simplified BioPhi server using:
```bash
docker run TBD
docker run [TO DO]
```

### 2b. Run simplified server using Conda
Expand All @@ -49,8 +49,9 @@ conda install biophi
biophi web
```

**Note:** This is simplified usage for local use only. See Deploying your own BioPhi server *(TODO LINK)* to learn about
deploying BioPhi properly on a server.
**Note:** This is simplified usage for local use only.
See [Deploying your own BioPhi server](#deploying-your-own-biophi-server) section below
to learn about deploying BioPhi properly on a server.

</details>

Expand All @@ -62,7 +63,26 @@ BioPhi also provides a command-line interface that enables bulk processing.
<summary>See more</summary>

```bash
TBD
# Get humanized FASTA
# Expected input: Both chains of each antibody should have the same ID
# with an optional _VL/_VH or _HC/_LC suffix
biophi sapiens mabs.fa --fasta-only --output humanized.fa

# Run full humanization & humanness evaluation pipeline
biophi sapiens mabs.fa \
--oasis-db path/to/downloaded/OASis_9mers_v1.db \
--output humanized/

# Get the Sapiens probability matrix (score of each residue at each position)
biophi sapiens mabs.fa --scores-only --output scores.csv

# Get mean Sapiens score (one score for each sequence)
biophi sapiens mabs.fa --mean-score-only --output scores.csv

# Get OASis humanness evaluation
biophi oasis mabs.fa \
--oasis-db path/to/downloaded/OASis_9mers_v1.db \
--output oasis.xlsx
```

</details>
Expand Down
2 changes: 1 addition & 1 deletion biophi/common/web/static/bio.css
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,7 @@ td.vernier {
z-index: 10;
display: inline-block;
position: absolute;
bottom: -0.80rem;
bottom: -0.95rem;
left: 0px;
}

Expand Down
6 changes: 6 additions & 0 deletions biophi/common/web/static/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,12 @@ p.subheading {
width: 280px;
}

.tooltip-peptides .tooltip-inner {
max-width: 320px;
min-width: 280px;
font-size: 90%;
}

tr[data-href] {
cursor: pointer;
}
Expand Down
8 changes: 6 additions & 2 deletions biophi/common/web/templates/numbering_component.html
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,14 @@
<div class="d-inline-block">
<label class="form-label" for="cdr_definition_input">
CDR Definition
{{ info_icon('Scheme used to define the CDR regions') }}
{{ info_icon('Scheme used to define the CDR regions. ' + ('Given regions are not humanized, the parental sequence is preserved.' if humanizing else '')) }}
</label>
<select class="form-select form-select-sm" style="width: 150px;" id="cdr_definition_input" name="cdr_definition">
<select class="form-select form-select-sm" style="width: 150px;"
id="cdr_definition_input" name="cdr_definition">
<option value="kabat" {% if cdr_definition == 'kabat' %}selected{% endif %}>Kabat</option>
{% if humanizing %}
<option value="kabat_vernier" {% if cdr_definition == 'kabat_vernier' %}selected{% endif %}>Kabat + Vernier</option>
{% endif %}
<option value="chothia" {% if cdr_definition == 'chothia' %}selected{% endif %}>Chothia</option>
<option value="imgt" {% if cdr_definition == 'imgt' %}selected{% endif %}>IMGT</option>
<option value="north" {% if cdr_definition == 'north' %}selected{% endif %}>North</option>
Expand Down
6 changes: 4 additions & 2 deletions biophi/common/web/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,10 @@ def icon(name, s=16):


@app.template_global()
def info_icon(text):
return Markup(f'<span data-tooltip-classes="tooltip-wide" data-bs-toggle="tooltip" title="{text}">{icon("info-circle-fill")}</span>')
def info_icon(text, filled=False, secondary=True, delay=100, s=16):
name = 'info-circle-fill' if filled else 'info-circle'
return Markup(f'<span class="{"text-secondary" if secondary else ""}" data-tooltip-classes="tooltip-wide" '
f'data-bs-delay="{delay}" data-bs-toggle="tooltip" title="{text}">{icon(name, s=s)}</span>')


@app.template_global()
Expand Down
17 changes: 10 additions & 7 deletions biophi/humanization/cli/oasis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@
@click.command()
@click.argument('inputs', required=True, nargs=-1)
@click.option('--output', required=False, help='Output XLSX report file path')
@click.option('--oas-db', required=True, help='OAS peptide database connection string')
@click.option('--oasis-db', required=True, help='OAS peptide database connection string')
@click.option('--scheme', default='kabat', help='Numbering scheme (kabat, chothia, imgt, aho)')
@click.option('--cdr-definition', default='kabat', help='Numbering scheme (kabat, chothia, imgt, north)')
@click.option('--min-percent-subjects', default=10, type=float, help='Minimum percent of OAS subjects to consider peptide human')
def oasis(inputs, output, oas_db, scheme, cdr_definition, min_percent_subjects):
def oasis(inputs, output, oasis_db, scheme, cdr_definition, min_percent_subjects):
"""OASis: Antibody humanness evaluation using 9-mer peptide search.
OASis evaluates antibody humanness by searching all overlapping 9-mers
Expand All @@ -25,8 +25,8 @@ def oasis(inputs, output, oas_db, scheme, cdr_definition, min_percent_subjects):
\b
# Evaluate humanness from FASTA file(s), save OASis humanness report to directory
biophi oasis input.fa --output ./report/ \\
--oas-db sqlite:////Absolute/path/to/oas_human_subject_9mers_2019_11.db
biophi oasis input.fa --output ./report.xlsx \\
--oasis-db sqlite:////Absolute/path/to/oas_human_subject_9mers_2019_11.db
INPUTS: Input FASTA file path(s)
"""
Expand All @@ -35,12 +35,15 @@ def oasis(inputs, output, oas_db, scheme, cdr_definition, min_percent_subjects):
| | | |/ _ \\\\___ \| / __|
| |_| / ___ \___| | \__ \\
\___/_/ \_\___/|_|___/
{}'''.format(f'version 1.0'.rjust(20)))
''')

assert 1 <= min_percent_subjects <= 90, '--min-percent-subjects should be between 1 and 90'

if not output.endswith('.xlsx'):
raise ValueError(f'The --output is a spreadsheet and should have an .xlsx extension')

click.echo(f'Settings:', err=True)
click.echo(f'- OAS database: {oas_db}', err=True)
click.echo(f'- OASis database: {oasis_db}', err=True)
click.echo('', err=True)

click.echo(f'Loading chains: {" ".join(inputs)}', err=True)
Expand All @@ -66,7 +69,7 @@ def oasis(inputs, output, oas_db, scheme, cdr_definition, min_percent_subjects):
show_unpaired_warning(antibody_inputs)

oasis_params = OASisParams(
oasis_db_path=oas_db,
oasis_db_path=oasis_db,
min_fraction_subjects=min_percent_subjects/100
)
pool = Pool()
Expand Down
17 changes: 10 additions & 7 deletions biophi/humanization/cli/sapiens.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,14 @@
@click.option('--fasta-only', is_flag=True, default=False, type=bool, help='Output only a FASTA file with humanized sequences (speeds up processing)')
@click.option('--scores-only', is_flag=True, default=False, type=bool, help='Output only a CSV file with Sapiens position*residue scores')
@click.option('--mean-score-only', is_flag=True, default=False, type=bool, help='Output only a CSV file with one Sapiens score per sequence')
@click.option('--oas-db', required=False, help='OAS peptide database connection string (required to run OASis)')
@click.option('--oasis-db', required=False, help='OAS peptide database connection string (required to run OASis)')
@click.option('--version', default='latest', help='Sapiens trained model name')
@click.option('--iterations', type=int, default=1, help='Run Sapiens given number of times to discover more humanizing mutations')
@click.option('--scheme', default=HumanizationParams.cdr_definition, help=f'Numbering scheme: one of {", ".join(SUPPORTED_SCHEMES)}')
@click.option('--cdr-definition', default=HumanizationParams.cdr_definition, help=f'CDR definition: one of {", ".join(SUPPORTED_CDR_DEFINITIONS)}')
@click.option('--humanize-cdrs', is_flag=True, default=False, type=bool, help='Allow humanizing mutations in CDRs')
@click.option('--limit', required=False, metavar='N', type=int, help='Process only first N records')
def sapiens(inputs, output, fasta_only, scores_only, mean_score_only, version, iterations, scheme, cdr_definition, humanize_cdrs, limit, oas_db):
def sapiens(inputs, output, fasta_only, scores_only, mean_score_only, version, iterations, scheme, cdr_definition, humanize_cdrs, limit, oasis_db):
"""Sapiens: Antibody humanization using deep learning.
Sapiens is trained on 20 million natural antibody sequences
Expand All @@ -50,7 +50,7 @@ def sapiens(inputs, output, fasta_only, scores_only, mean_score_only, version, i
\b
# Humanize FASTA file(s), save to directory along with OASis humanness report
biophi sapiens input.fa --output ./report/ \\
--oas-db sqlite:////Absolute/path/to/oas_human_subject_9mers_2019_11.db
--oasis-db sqlite:////Absolute/path/to/oas_human_subject_9mers_2019_11.db
INPUTS: Input FASTA file path(s). If not provided, creates an interactive session.
"""
Expand All @@ -60,7 +60,7 @@ def sapiens(inputs, output, fasta_only, scores_only, mean_score_only, version, i
\___ \ / _` | '_ \| |/ _ \ '_ \/ __|
___| | |_| | |_| | | __/ | | \__ \\
|____/ \__,_| __/|_|\___|_| |_|___/
|_| ''')
|_| ''')

click.echo(f'Settings:', err=True)
click.echo(f'- Predicting using Sapiens model: {version}', err=True)
Expand All @@ -84,9 +84,9 @@ def sapiens(inputs, output, fasta_only, scores_only, mean_score_only, version, i
iterations=iterations
)
oasis_params = OASisParams(
oasis_db_path=oas_db,
oasis_db_path=oasis_db,
min_fraction_subjects=0.10
) if oas_db else None
) if oasis_db else None

if inputs:
if scores_only or mean_score_only:
Expand Down Expand Up @@ -210,7 +210,10 @@ def sapiens_fasta_only(inputs, output_fasta, humanization_params, limit=None):

def sapiens_full(inputs, output_dir, humanization_params, oasis_params, limit=None):
if oasis_params is None:
raise ValueError('OASis params need to be provided for full output, or consider using --fasta-only')
raise ValueError('Use --oasis-db PATH_TO_OASIS.db to get full output, or consider using --fasta-only')
if output_dir is None:
raise ValueError('Use --output mydir/ to specify the output directory')

if not os.path.exists(output_dir) or not os.path.isdir(output_dir):
os.mkdir(output_dir)
if len(os.listdir(output_dir)):
Expand Down
44 changes: 34 additions & 10 deletions biophi/humanization/methods/humanization.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class SapiensHumanizationParams(HumanizationParams):
method = 'sapiens'
model_version: str = 'latest'
humanize_cdrs: bool = False
backmutate_vernier: bool = False
iterations: int = 1

def get_export_name(self):
Expand Down Expand Up @@ -75,9 +76,14 @@ class HumanizedResidueAnnot:

@dataclass
class ChainHumanization:
# Chain object containing the parental sequence
parental_chain: Chain
# Chain object containing the humanized sequence
humanized_chain: Chain
# Scores predicted from last parental sequence, can be used to explain the prediction
scores: Dict[Position, Dict[str, float]]
# Scores predicted from last humanized sequence, can be used to propose next mutations
next_scores: Dict[Position, Dict[str, float]]

@cached_property
def alignment(self) -> Alignment:
Expand All @@ -90,11 +96,12 @@ def get_alignment_string(self):
chain_label = 'VH' if self.parental_chain.is_heavy_chain() else 'VL'
return f'{self.parental_chain.name} {chain_label}\n{self.alignment}'

def get_top_scores(self, n):
def get_top_scores(self, n, next=False):
scores = self.next_scores if next else self.scores
top_scores = []
for i in range(n):
top_n_scores = []
for pos, aa_scores in self.scores.items():
for pos, aa_scores in scores.items():
aa, score = sorted(aa_scores.items(), key=lambda d: -d[1])[i]
top_n_scores.append((pos, aa, score))
top_scores.append(top_n_scores)
Expand Down Expand Up @@ -181,25 +188,31 @@ def cdr_grafting_humanize_chain(parental_chain: Chain, params: CDRGraftingHumani
# Compute Sapiens scores (used for Designer and final Sapiens pass if enabled)
sapiens_humanization = sapiens_humanize_chain(
humanized_chain,
params=SapiensHumanizationParams(iterations=params.sapiens_iterations)
params=SapiensHumanizationParams(
iterations=params.sapiens_iterations,
backmutate_vernier=params.backmutate_vernier
)
)
if params.sapiens_iterations:
humanized_chain = sapiens_humanization.humanized_chain

return ChainHumanization(
parental_chain=parental_chain,
humanized_chain=humanized_chain,
scores=sapiens_humanization.scores
scores=sapiens_humanization.scores,
next_scores=sapiens_humanization.next_scores
)


def sapiens_humanize_chain(parental_chain: Chain, params: SapiensHumanizationParams) -> ChainHumanization:
# Repeat Sapiens multiple times if requested, we start with the parental chain
humanized_chain = parental_chain.clone()
# Get Sapiens scores as a positions (rows) by amino acids (columns) matrix
pred = sapiens_predict_chain(humanized_chain, model_version=params.model_version)

pred = None
for iteration in range(params.iterations):
# Get Sapiens scores as a positions (rows) by amino acids (columns) matrix
pred = sapiens_predict_chain(humanized_chain, model_version=params.model_version)

# Create humanized sequence by taking the amino acid with highest score at each position
humanized_seq = ''.join(pred.idxmax(axis=1).values)

Expand All @@ -210,13 +223,24 @@ def sapiens_humanize_chain(parental_chain: Chain, params: SapiensHumanizationPar

# Graft parental CDRs into the humanized sequence, unless humanizing CDRs as well
if not params.humanize_cdrs:
humanized_chain = parental_chain.graft_cdrs_onto(humanized_chain)
humanized_chain = parental_chain.graft_cdrs_onto(
humanized_chain,
backmutate_vernier=params.backmutate_vernier
)
else:
if params.backmutate_vernier:
raise ValueError('Cannot backmutate Vernier regions when humanizing CDRs')

# Get Sapiens scores as a positions (rows) by amino acids (columns) matrix
pred = sapiens_predict_chain(humanized_chain, model_version=params.model_version)
if pred is None:
# Support case with 0 iterations, still return probabilities
pred = sapiens_predict_chain(parental_chain, model_version=params.model_version)

# Predict scores of potential next mutation from the final humanized sequence
pred_next = sapiens_predict_chain(humanized_chain, model_version=params.model_version)

return ChainHumanization(
parental_chain=parental_chain,
humanized_chain=humanized_chain,
scores={pos: row.to_dict() for pos, (i, row) in zip(humanized_chain.positions, pred.iterrows())}
scores={pos: row.to_dict() for pos, (i, row) in zip(humanized_chain.positions, pred.iterrows())},
next_scores={pos: row.to_dict() for pos, (i, row) in zip(humanized_chain.positions, pred_next.iterrows())},
)
29 changes: 18 additions & 11 deletions biophi/humanization/methods/humanness.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ class ChainHumanness:
v_germline_names: List[str]
j_germline_names: List[str]
v_germline_family: str
v_germline_suffix: str
germline_family_residue_frequency: Dict[Position, Dict[str, float]]
chain_type_residue_frequency: Dict[Position, Dict[str, float]]

Expand Down Expand Up @@ -128,18 +129,17 @@ def get_peptide_length(self) -> int:
assert len(set(lengths)) == 1, f'Peptides should have same lengths, got: {set(lengths)}'
return lengths[0]

def get_positional_humanness(self, min_fraction_subjects) -> List[Tuple[Position, str, int]]:
def get_positional_humanness(self, min_fraction_subjects) -> List[Tuple[Position, str, List[PeptideHumanness]]]:
chain_positions = list(self.chain.positions)
chain_len = len(self.chain)
peptide_len = self.get_peptide_length()
assert peptide_len % 2 == 1, 'Peptide needs to have odd length'
half = int((peptide_len - 1) / 2)
annots = []
for raw_pos, (pos, aa) in enumerate(self.chain):
window = [chain_positions[i] for i in range(max(0, raw_pos-half), min(chain_len, raw_pos+half+1))]
num_non_human = sum(not self.peptides[peptide_pos].is_human(min_fraction_subjects)
for peptide_pos in window if peptide_pos in self.peptides)
annots.append((pos, aa, num_non_human))
window = [chain_positions[i] for i in range(max(0, raw_pos-peptide_len+1), raw_pos+1)]
peptides = [self.peptides[peptide_pos]
for peptide_pos in window if peptide_pos in self.peptides]
non_human_peptides = [p for p in peptides if not p.is_human(min_fraction_subjects)]
annots.append((pos, aa, non_human_peptides))
return annots

def to_peptide_dataframe(self) -> pd.DataFrame:
Expand Down Expand Up @@ -250,7 +250,7 @@ def get_germline_content(self):
if self.vl:
num_germline_residues += self.vl.num_germline_residues
num_total_residues += len(self.vl.chain)
if num_total_residues is 0:
if num_total_residues == 0:
return None
return num_germline_residues / num_total_residues

Expand All @@ -266,9 +266,8 @@ def chop_seq_peptides(seq: Union[SeqRecord, Chain], peptide_length):
seq = ''.join(seq.positions.values())
else:
raise ValueError(f'Unsupported sequence type: {type(seq)}')
left = int(peptide_length/2)
right = int(np.ceil(peptide_length/2))
return [(positions[center], seq[center - left:center + right]) for center in range(left, len(seq) - right + 1)]

return [(pos, seq[i:i + peptide_length]) for i, pos in enumerate(positions[:-peptide_length+1])]


def get_antibody_humanness(vh: Optional[Chain], vl: Optional[Chain], params: OASisParams) -> AntibodyHumanness:
Expand All @@ -289,6 +288,12 @@ def get_chain_oasis_peptides(chain, params: OASisParams):
num_oas_occurrences=None
) for pos, peptide in pos_peptides}

if params.oasis_db_path.endswith('.gz'):
raise ValueError('The OASis DB file needs to be unzipped (use "gunzip DB_PATH.db.gz")')

if not os.path.exists(params.oasis_db_path):
raise FileNotFoundError(f'The OASis DB path does not exist: {params.oasis_db_path}')

oas_engine = create_engine('sqlite:///' + os.path.abspath(params.oasis_db_path), echo=False)

oas_filter_chain = "Heavy" if chain.is_heavy_chain() else "Light"
Expand Down Expand Up @@ -323,6 +328,7 @@ def get_chain_humanness(chain: Chain, params: OASisParams) -> ChainHumanness:
for pos, aa in imgt_chain)

v_germline_family = top_v.name.split('-')[0].split('/')[0]
v_germline_suffix = top_v.name.replace(v_germline_family, '')
return ChainHumanness(
chain=chain,
imgt_chain=imgt_chain,
Expand All @@ -331,6 +337,7 @@ def get_chain_humanness(chain: Chain, params: OASisParams) -> ChainHumanness:
j_germline_names=[chain.name for chain in j_germline_chains],
peptides=peptides,
v_germline_family=v_germline_family,
v_germline_suffix=v_germline_suffix,
germline_family_residue_frequency=get_germline_family_residue_frequency(chain, imgt_chain, v_germline_family),
chain_type_residue_frequency=get_chain_type_residue_frequency(chain, imgt_chain)
)
Expand Down
Loading

0 comments on commit 327ac82

Please sign in to comment.