-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #89 from volkamerlab/normalize-fps
Implement min-max normalization (coarse/fine)
- Loading branch information
Showing
26 changed files
with
905 additions
and
605 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
""" | ||
kissim.api.normalize | ||
Main API for normalizing fingerprints. | ||
""" | ||
|
||
import logging | ||
from pathlib import Path | ||
|
||
from kissim.encoding import FingerprintGenerator, FingerprintGeneratorNormalized | ||
|
||
logger = logging.getLogger(__name__) | ||
|
||
|
||
def normalize( | ||
fingerprints_path, method="min_max", fine_grained=True, fingerprints_normalized_path=None | ||
): | ||
""" | ||
Remove outlier fingerprints (defined by spatial distances maximum). | ||
Parameters | ||
---------- | ||
fingerprints_path : str or pathlib.Path | ||
Path to fingerprints JSON file. | ||
method : str | ||
Normalization method. | ||
fine_grained : bool | ||
True (default): | ||
Distances: Calculate min/max per subpocket for each residue position individually. | ||
Moments: Calculate min/max per moment for each subpocket individually. | ||
False: | ||
Distances: Calculate min/max per subpocket over all residue positions. | ||
Moments: Calculate min/max per moment over all subpockets. | ||
fingerprints_normalized_path : str or pathlib.Path | ||
Path to normalized fingerprints JSON file. | ||
Returns | ||
------- | ||
kissim.encoding.FingerprintGenerator | ||
Normalized Fingerprints. | ||
""" | ||
|
||
# Load fingerprints | ||
logger.info("Read fingerprints...") | ||
fingerprints_path = Path(fingerprints_path) | ||
fingerprint_generator = FingerprintGenerator.from_json(fingerprints_path) | ||
logger.info(f"Number of fingerprints: {len(fingerprint_generator.data)}") | ||
|
||
# Normalize fingerprints | ||
logger.info("Normalize fingerprints...") | ||
logger.info(f"Normalization method: {method}") | ||
logger.info(f"Use fine-grained normalization: {fine_grained}") | ||
fingerprint_generator_normalized = FingerprintGeneratorNormalized.from_fingerprint_generator( | ||
fingerprint_generator, method, fine_grained | ||
) | ||
if fingerprints_normalized_path is not None: | ||
fingerprints_normalized_path = Path(fingerprints_normalized_path) | ||
fingerprint_generator_normalized.to_json(fingerprints_normalized_path) | ||
logger.info(f"Number of fingerprints: {len(fingerprint_generator_normalized.data)}") | ||
|
||
return fingerprint_generator_normalized |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
""" | ||
kissim.cli.normalize | ||
Normalize fingerprints from CLI arguments. | ||
""" | ||
|
||
from kissim.api import normalize | ||
from kissim.cli.utils import configure_logger | ||
|
||
|
||
def normalize_from_cli(args): | ||
""" | ||
Normalize fingerprints. | ||
Parameters | ||
---------- | ||
args : argsparse.Namespace | ||
CLI arguments. | ||
""" | ||
|
||
configure_logger(args.output) | ||
normalize(args.input, args.method, bool(args.fine_grained), args.output) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
subpocket,min_max,any | ||
hinge_region,min,2.37 | ||
hinge_region,max,30.79 | ||
dfg_region,min,0.87 | ||
dfg_region,max,33.62 | ||
front_pocket,min,1.36 | ||
front_pocket,max,33.36 | ||
center,min,1.11 | ||
center,max,27.03 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
subpocket,min_max,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85 | ||
hinge_region,min,11.52,9.3,7.83,6.55,5.66,6.86,7.71,6.37,8.84,7.14,6.25,7.1,6.89,5.15,2.37,5.07,6.08,9.83,11.16,12.18,9.73,12.85,12.86,9.08,9.69,11.36,11.29,9.52,11.18,12.21,11.11,10.48,9.68,10.0,6.5,5.4,4.58,6.25,7.5,7.07,9.78,8.37,7.16,4.95,3.25,3.53,2.69,4.82,5.28,9.24,7.7,9.3,10.71,9.63,11.32,11.75,16.63,18.63,14.66,16.17,15.69,19.15,19.78,18.17,17.34,16.98,15.05,13.61,14.33,12.49,13.12,13.15,11.43,9.35,8.0,8.08,7.64,9.19,6.56,5.17,5.98,5.24,7.21,4.34,5.96 | ||
hinge_region,max,23.62,20.68,18.57,18.8,20.51,19.77,19.17,18.26,17.01,16.03,12.37,12.79,14.27,8.53,6.23,8.29,11.01,13.56,16.08,30.79,27.93,24.99,23.72,23.31,21.24,21.71,23.29,25.5,24.68,23.65,20.26,19.07,22.95,19.57,16.31,13.7,11.93,10.56,12.02,12.9,14.56,13.52,10.59,8.03,6.0,6.16,6.94,9.82,12.34,12.85,14.02,14.75,17.47,19.59,21.55,24.16,23.94,27.83,27.15,30.31,28.44,27.9,27.14,26.34,24.67,22.55,22.6,20.71,21.55,21.26,18.24,17.88,17.13,15.15,12.46,13.23,12.19,14.76,11.45,9.07,12.25,14.45,24.17,30.0,24.42 | ||
dfg_region,min,16.23,14.04,12.53,6.09,8.57,5.47,4.55,3.92,5.08,7.66,8.13,10.06,10.56,12.08,9.26,7.88,5.54,6.8,4.58,2.01,2.86,3.3,4.85,2.64,3.62,4.66,7.12,6.03,6.21,9.48,10.75,11.61,11.97,10.64,7.56,6.51,7.74,6.7,9.24,8.1,6.1,5.33,2.95,6.35,7.74,10.96,11.01,11.57,14.32,16.37,15.1,13.65,16.1,16.96,17.74,16.88,21.7,22.6,17.13,12.89,10.94,14.4,15.4,12.19,10.16,10.29,9.35,8.98,9.92,9.42,11.43,10.78,13.67,10.73,10.05,11.71,12.53,11.48,8.19,5.57,4.18,2.3,1.12,0.87,1.68 | ||
dfg_region,max,30.33,27.63,24.45,22.83,21.77,22.91,25.38,23.47,19.85,18.67,17.56,20.22,21.3,18.73,15.42,12.98,10.98,12.88,13.56,21.63,18.26,15.91,15.21,17.43,16.35,16.71,19.99,21.29,20.95,21.2,19.46,22.08,25.42,23.36,19.71,17.63,16.97,14.47,16.63,15.21,15.61,11.54,10.84,13.71,14.09,17.22,18.3,19.88,22.9,24.04,24.27,22.34,23.75,26.23,27.74,29.72,30.02,33.58,33.62,28.7,26.96,25.28,25.46,22.78,21.72,18.7,19.09,17.5,18.97,18.47,20.73,22.21,23.28,20.99,18.01,19.22,19.56,22.45,17.2,13.75,12.26,10.42,18.96,26.47,22.51 | ||
front_pocket,min,10.92,7.52,5.74,4.05,2.38,2.24,2.28,3.82,4.04,7.36,5.68,7.7,7.9,9.6,7.31,8.93,7.61,10.78,11.79,13.0,11.99,14.37,13.48,10.44,11.79,13.75,12.94,12.42,13.6,15.7,14.26,13.98,13.99,11.99,9.92,7.91,10.41,9.92,12.06,11.5,14.31,12.76,10.69,10.66,8.66,8.9,6.06,6.63,6.81,4.89,4.18,3.3,6.26,8.24,8.45,7.61,12.08,13.36,10.53,15.61,14.63,18.02,19.4,18.72,16.58,16.04,14.75,11.14,11.83,8.62,8.67,7.75,5.7,4.07,2.7,5.41,4.64,7.4,6.79,4.27,3.05,1.76,2.37,1.64,1.36 | ||
front_pocket,max,23.34,19.68,18.01,19.84,20.9,19.48,17.04,17.51,15.22,13.11,13.32,15.09,17.34,14.29,11.16,13.4,12.49,15.14,17.26,32.93,30.28,27.96,27.35,25.56,24.07,24.92,26.58,28.1,26.59,26.02,23.58,22.91,22.81,20.01,16.48,14.74,14.51,16.72,18.22,18.42,20.2,17.36,15.06,14.19,11.79,12.62,12.84,12.59,15.5,13.27,15.98,12.98,13.68,16.23,17.69,19.57,20.37,25.21,24.23,33.36,30.61,29.49,27.84,26.72,25.15,21.76,21.44,18.21,17.92,16.9,14.04,13.56,12.38,10.03,8.25,9.14,9.23,12.23,12.41,10.56,13.21,14.06,20.05,23.84,21.09 | ||
center,min,15.17,12.05,10.22,8.05,6.51,6.02,5.15,5.19,8.39,9.58,7.72,9.47,10.25,9.36,7.16,7.87,6.76,10.38,10.67,10.43,9.44,10.84,9.59,6.44,7.53,8.72,8.48,7.42,8.28,10.36,9.39,9.03,9.46,7.84,5.71,4.08,5.49,6.29,9.32,8.6,10.79,9.77,7.55,7.4,5.52,6.94,6.25,7.4,9.94,10.17,9.44,8.74,10.15,12.42,13.19,12.16,16.43,18.15,14.43,12.06,11.65,14.74,15.76,14.3,12.37,12.19,10.42,8.63,9.64,8.36,8.84,8.07,9.14,5.68,5.46,6.5,6.84,5.61,1.88,1.11,1.24,1.75,2.5,2.97,3.29 | ||
center,max,26.03,22.76,20.23,20.8,21.68,21.14,19.48,20.33,18.81,17.58,16.44,18.23,19.64,13.57,10.86,12.69,12.27,15.2,16.28,25.95,23.32,20.81,20.31,21.64,17.82,18.63,20.24,21.73,20.56,20.22,17.67,17.3,20.18,17.08,13.65,12.19,10.88,12.01,13.64,15.27,17.86,15.02,12.87,11.51,9.31,10.95,12.01,13.39,16.92,15.66,18.47,16.43,18.35,20.46,22.63,24.25,23.81,26.89,26.8,27.03,24.93,23.83,22.55,21.54,19.84,17.4,17.05,15.36,15.64,15.21,13.15,13.95,14.11,12.01,8.89,10.33,11.69,13.93,11.41,8.22,7.39,9.32,20.52,26.75,20.19 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
moment,min_max,any | ||
1,min,11.47 | ||
1,max,16.85 | ||
2,min,2.88 | ||
2,max,6.19 | ||
3,min,-2.29 | ||
3,max,6.31 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
moment,min_max,hinge_region,dfg_region,front_pocket,center | ||
1,min,12.2,12.72,12.33,11.47 | ||
1,max,13.85,16.85,14.34,12.97 | ||
2,min,4.06,3.91,3.34,2.88 | ||
2,max,5.76,6.17,6.19,4.48 | ||
3,min,-2.17,-1.12,-2.1,-2.29 | ||
3,max,5.16,6.31,5.97,4.15 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
4 changes: 2 additions & 2 deletions
4
kissim/encoding/base.py → kissim/encoding/fingerprint_base.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.