diff --git a/README.md b/README.md index 3246aa2..a71e9a0 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,23 @@ -# UCSF PyEM -UCSF PyEM is a collection of Python modules and command-line utilities for electron microscopy of biological samples. +# UCSF pyem +UCSF pyem is a collection of Python modules and command-line utilities for electron microscopy of biological samples. + +Documentation for the programs can be found in their usage text, comments in code, and in the Wiki of this repository. The entire collection is licensed under the terms of the GNU Public License, version 3 (GPLv3). -Copyright information is listed within each individual file. Current copyright holders include: - * Eugene Palovcak (UCSF) - * Daniel Asarnow (UCSF) +# How to cite -Documentation for the programs can be found in their usage text, comments in code, and in the Wiki of this repository. +Please cite the DOI for the UCSF pyem code repository. -## Programs - 1. `projection_subtraction.py` - Perform projection subtraction using per-particle FRC normalization. - + `recenter.py` - Recenter particles on the center-of-mass of corresponding 2D class-averages. - + `angdist.py` - Graph angular distributions on polar scatter plots. Supports particle subset selection. - + `pyem/star.py` - Alter .star files. Supports dropping arbitrary fields, Euler angles, etc. - + `project.py` - Project a map according to .star file entries (angles, CTF, etc.). - + `csparc2star.py` - Convert Cryosparc metadata files to Relion .star format. +# Installation -## Library modules - 1. `pyem/mrc.py` - Simple, standalone MRC I/O functions. - + `pyem/star.py` - Parse and write .star files. Uses pandas.DataFrame as a backend. +To install UCSF pyem, please follow the +[instructions](https://github.com/asarnow/pyem/wiki/Install-pyem-with-Miniconda) in the project wiki. -## Other files - 1. `activate` - Place in `EMAN2/bin` to turn EMAN2 into a Python virtual environment. +# Exporting from cryoSPARC to Relion +The most popular feature of UCSF pyem is exporting particle metadata from cryoSPARC. +Detailed [instructions](https://github.com/asarnow/pyem/wiki/Export-from-cryoSPARC-v2) can be found in the project wiki. -(C) 2016 Daniel Asarnow +(C) 2016-2019 Daniel Asarnow University of California, San Francisco diff --git a/csparc2star.py b/csparc2star.py index f886c55..9d34c64 100755 --- a/csparc2star.py +++ b/csparc2star.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2016 Daniel Asarnow # University of California, San Francisco # @@ -41,9 +41,8 @@ def main(args): try: df = metadata.parse_cryosparc_2_cs(cs, passthroughs=args.input[1:], minphic=args.minphic, boxsize=args.boxsize, swapxy=args.swapxy) except (KeyError, ValueError) as e: - log.error(e.message) - log.error("A passthrough file may be required (check inside the cryoSPARC 2+ job directory)") - log.debug(e, exc_info=True) + log.error(e, exc_info=True) + log.error("Required fields could not be mapped. Are you using the right input file(s)?") return 1 else: log.debug("Detected CryoSPARC 0.6.5 .csv file") @@ -58,10 +57,8 @@ def main(args): if args.copy_micrograph_coordinates is not None: coord_star = pd.concat( - (star.parse_star(inp, keep_index=False) for inp in + (star.parse_star(inp, keep_index=False, augment=True) for inp in glob(args.copy_micrograph_coordinates)), join="inner") - star.augment_star_ucsf(coord_star) - star.augment_star_ucsf(df) key = star.merge_key(df, coord_star) log.debug("Coordinates merge key: %s" % key) if args.cached or key == star.Relion.IMAGE_NAME: @@ -79,7 +76,7 @@ def main(args): df = star.transform_star(df, r, inplace=True) # Write Relion .star file with correct headers. - star.write_star(args.output, df) + star.write_star(args.output, df, resort_records=True) log.info("Output fields: %s" % ", ".join(df.columns)) return 0 diff --git a/ctf2star.py b/ctf2star.py index 80c8544..aa391ac 100755 --- a/ctf2star.py +++ b/ctf2star.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3 +#!/usr/bin/env python # Copyright (C) 2019 Daniel Asarnow # University of California, San Francisco # diff --git a/map.py b/map.py index bb1779a..cc3aa34 100755 --- a/map.py +++ b/map.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # @@ -30,20 +30,15 @@ from pyem import vop from scipy.ndimage import affine_transform from scipy.ndimage import shift -from scipy.ndimage import zoom +import warnings +warnings.filterwarnings('ignore', '.*output shape of zoom.*') def main(args): log = logging.getLogger(__name__) - log.setLevel(logging.INFO) hdlr = logging.StreamHandler(sys.stdout) - if args.quiet: - hdlr.setLevel(logging.ERROR) - elif args.verbose: - hdlr.setLevel(logging.INFO) - else: - hdlr.setLevel(logging.WARN) log.addHandler(hdlr) + log.setLevel(logging.getLevelName(args.loglevel.upper())) data, hdr = read(args.input, inc_header=True) final = None @@ -80,11 +75,32 @@ def main(args): args.apix = hdr["xlen"] / hdr["nx"] log.info("Using computed pixel size of %f Angstroms" % args.apix) - if args.target and args.matrix: + if args.apix_out is not None: + if args.scale is not None: + log.warn("--apix-out supersedes --scale") + args.scale = args.apix / args.apix_out + elif args.scale is not None: + args.apix_out = args.apix / args.scale + elif args.boxsize is not None: + args.scale = box[0] / np.double(args.boxsize) + + if args.apix_out is None: + args.apix_out = args.apix + + if args.boxsize is None: + if args.scale is None: + args.boxsize = box[0] + args.scale = 1 + else: + args.boxsize = np.int(box[0] * args.scale) + + log.info("Volume will be scaled by %f to size %d @ %f A/px" % (args.scale, args.boxsize, args.apix_out)) + + if args.target and args.transform: log.warn("Target pose transformation will be applied after explicit matrix") - if args.euler is not None and (args.target is not None or args.matrix is not None): + if args.euler is not None and (args.target is not None or args.transform is not None): log.warn("Euler transformation will be applied after target pose transformation") - if args.translate is not None and (args.euler is not None or args.target is not None or args.matrix is not None): + if args.translate is not None and (args.euler is not None or args.target is not None or args.transform is not None): log.warn("Translation will be applied after other transformations") if args.origin is not None: @@ -98,17 +114,26 @@ def main(args): args.origin = center log.info("Origin set to box center, %s" % (args.origin * args.apix)) - if not (args.target is None and args.euler is None and args.matrix is None and args.boxsize is None) \ + if not (args.target is None and args.euler is None and args.transform is None and args.boxsize is None) \ and vop.ismask(data) and args.spline_order != 0: log.warn("Input looks like a mask, --spline-order 0 (nearest neighbor) is recommended") - if args.matrix is not None: + if args.transform is not None: try: - r = np.array(json.loads(args.matrix)) + args.transform = np.array(json.loads(args.transform)) except: - log.error("Matrix format is incorrect") + log.error("Transformation matrix must be in JSON/Numpy format") return 1 - data = vop.resample_volume(data, r=r, t=None, ori=None, order=args.spline_order) + r = args.transform[:, :3] + if args.transform.shape[1] == 4: + t = args.transform[:, -1] / args.apix + t = r.dot(args.origin) + t - args.origin + t = -r.T.dot(t) + else: + t = 0 + log.debug("Final rotation: %s" % str(r).replace("\n", "\n" + " " * 16)) + log.debug("Final translation: %s (%f px)" % (str(t), np.linalg.norm(t))) + data = vop.resample_volume(data, r=r, t=t, ori=None, order=args.spline_order, invert=args.invert) if args.target is not None: try: @@ -122,7 +147,9 @@ def main(args): r = vec2rot(args.target) t = np.linalg.norm(args.target) log.info("Euler angles are %s deg and shift is %f px" % (np.rad2deg(rot2euler(r)), t)) - data = vop.resample_volume(data, r=r, t=args.target, ori=ori, order=args.spline_order, invert=args.target_invert) + log.debug("Final rotation: %s" % str(r).replace("\n", "\n" + " " * 16)) + log.debug("Final translation: %s (%f px)" % (str(t), np.linalg.norm(t))) + data = vop.resample_volume(data, r=r, t=args.target, ori=ori, order=args.spline_order, invert=args.invert) if args.euler is not None: try: @@ -144,11 +171,6 @@ def main(args): args.translate -= args.origin data = shift(data, -args.translate, order=args.spline_order) - if args.boxsize is not None: - args.boxsize = np.double(args.boxsize) - data = zoom(data, args.boxsize / box, order=args.spline_order) - args.apix = args.apix * box[0] / args.boxsize - if final is None: final = data @@ -156,7 +178,10 @@ def main(args): final_mask = read(args.final_mask) final *= final_mask - write(args.output, final, psz=args.apix) + if args.scale != 1 or args.boxsize != box[0]: + final = vop.resample_volume(final, scale=args.scale, output_shape=args.boxsize, order=args.spline_order) + + write(args.output, final, psz=args.apix_out) return 0 @@ -167,7 +192,7 @@ def main(args): parser.add_argument("input", help="Input volume (MRC file)") parser.add_argument("output", help="Output volume (MRC file)") parser.add_argument("--apix", "--angpix", "-a", help="Pixel size in Angstroms", type=float) - parser.add_argument("--mask", help="Final mask to apply after any operations", dest="final_mask") + parser.add_argument("--mask", help="Final mask (applied before scaling)", dest="final_mask") parser.add_argument("--transpose", help="Swap volume axes order", metavar="a1,a2,a3") parser.add_argument("--normalize", "-n", help="Convert map densities to Z-scores", action="store_true") parser.add_argument("--reference", "-r", help="Normalization reference volume (MRC file)") @@ -176,15 +201,17 @@ def main(args): parser.add_argument("--pfac", help="Padding factor for 3D FFT", type=int, default=2) parser.add_argument("--origin", help="Origin coordinates in Angstroms (volume center by default)", metavar="x,y,z") parser.add_argument("--target", help="Target pose (view axis and origin) coordinates in Angstroms", metavar="x,y,z") - parser.add_argument("--target-invert", help="Undo target pose transformation", action="store_true") + parser.add_argument("--invert", help="Invert the transformation", action="store_true") + parser.add_argument("--target-invert", action="store_true", dest="invert", help=argparse.SUPPRESS) parser.add_argument("--euler", help="Euler angles in degrees (Relion conventions)", metavar="phi,theta,psi") parser.add_argument("--translate", help="Translation coordinates in Angstroms", metavar="x,y,z") - parser.add_argument("--matrix", + parser.add_argument("--transform", help="Transformation matrix (3x3 or 3x4 with translation in Angstroms) in Numpy/json format") - parser.add_argument("--boxsize", help="Set the output box dimensions, accounting for pixel size", type=int) + parser.add_argument("--boxsize", help="Output box size (pads/crops with --scale or --apix-out, otherwise scales)", type=int) + parser.add_argument("--scale", help="Scale factor for output pixel size", type=float) + parser.add_argument("--apix-out", help="Pixel size in output (similar to --scale)", type=float) parser.add_argument("--spline-order", help="Order of spline interpolation (0 for nearest, 1 for trilinear, default is cubic)", type=int, default=3, choices=np.arange(6)) - parser.add_argument("--quiet", "-q", help="Print errors only", action="store_true") - parser.add_argument("--verbose", "-v", help="Print info messages", action="store_true") + parser.add_argument("--loglevel", "-l", type=str, default="WARNING", help="Logging level and debug output") sys.exit(main(parser.parse_args())) diff --git a/mask.py b/mask.py index ec960b2..96be3a5 100755 --- a/mask.py +++ b/mask.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # @@ -40,7 +40,7 @@ def main(args): base_map = read(args.base_map, inc_header=False) base_mask = binarize_volume(base_map, args.threshold, minvol=args.minvol, fill=args.fill) total_width = args.extend + args.edge_width - excl_mask = binary_dilate(mask, args.extend+args.edge_width, strel=args.relion) + excl_mask = binary_dilate(mask, total_width, strel=args.relion) base_mask = binary_dilate(base_mask, args.extend, strel=args.relion) base_mask = base_mask &~ excl_mask if args.overlap > 0: @@ -53,7 +53,7 @@ def main(args): se = binary_sphere(args.extend, False) mask = binary_closing(mask, structure=se, iterations=1) final = mask.astype(np.single) - if args.edge_width is not None: + if args.edge_width != 0: dt = distance_transform_edt(~mask) # Compute *outward* distance transform of mask. idx = (dt <= args.edge_width) & (dt > 0) # Identify edge points by distance from mask. x = np.arange(1, args.edge_width + 1) # Domain of the edge profile. @@ -84,7 +84,7 @@ def main(args): parser.add_argument("--extend", "-e", help="Structuring element size for dilating initial mask", type=int, default=0) parser.add_argument("--edge-width", "-w", help="Width for soft edge", - type=int) + type=int, default=0) parser.add_argument("--edge-profile", "-p", help="Soft edge profile type", choices=["sinusoid"], default="sinusoid") diff --git a/par2star.py b/par2star.py index 4c13220..ee36735 100755 --- a/par2star.py +++ b/par2star.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # @@ -47,7 +47,7 @@ def main(args): df = df.sort_values(by="C") df = metadata.par2star(df, data_path=args.stack, apix=args.apix, cs=args.cs, - ac=args.ac, kv=args.voltage, invert_eulers=args.relion) + ac=args.ac, kv=args.voltage, invert_eulers=args.invert_eulers) if args.cls is not None: df = star.select_classes(df, args.cls) @@ -68,7 +68,8 @@ def main(args): parser.add_argument("--voltage", "--kv", "-v", help="Acceleration voltage (kV)", type=float) parser.add_argument("--min-occ", help="Minimum occupancy for inclusion in output", type=float) parser.add_argument("--class", "-c", help="Classes to preserve in output", action="append", dest="cls") - parser.add_argument("--relion", help="Use if .par file came from importing .star in cisTEM", action="store_false") + parser.add_argument("--relion", help=argparse.SUPPRESS, action="store_true") + parser.add_argument("--invert-eulers", help="Invert Euler angles (generally unnecessary)", action="store_true") parser.add_argument("--loglevel", "-l", type=str, default="WARNING", help="Logging level and debug output") sys.exit(main(parser.parse_args())) diff --git a/pose.py b/pose.py index f23c6ff..58ba50d 100644 --- a/pose.py +++ b/pose.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2018 Daniel Asarnow # University of California, San Francisco diff --git a/project.py b/project.py index ff25398..9a5a58a 100755 --- a/project.py +++ b/project.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2016 Daniel Asarnow, Eugene Palovcak # University of California, San Francisco # diff --git a/projection_subtraction.py b/projection_subtraction.py index e14c705..606901a 100755 --- a/projection_subtraction.py +++ b/projection_subtraction.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2015-2018 Daniel Asarnow, Eugene Palovcak # University of California, San Francisco diff --git a/pyem/algo/algo.py b/pyem/algo/algo.py index d3a76d0..4a519e2 100644 --- a/pyem/algo/algo.py +++ b/pyem/algo/algo.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # Copyright (C) 2018 Daniel Asarnow # University of California, San Francisco # diff --git a/pyem/algo/algo_numba.py b/pyem/algo/algo_numba.py index 5f8c1f2..6ae8f86 100644 --- a/pyem/algo/algo_numba.py +++ b/pyem/algo/algo_numba.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # Copyright (C) 2018 Daniel Asarnow # University of California, San Francisco # diff --git a/pyem/ctf.py b/pyem/ctf.py index 34d9e76..384d199 100644 --- a/pyem/ctf.py +++ b/pyem/ctf.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # -*- coding: utf-8 -*- # Copyright (C) 2018 Daniel Asarnow, Eugene Palovcak # University of California, San Francisco diff --git a/pyem/metadata.py b/pyem/metadata.py index 00f7150..fe530b6 100644 --- a/pyem/metadata.py +++ b/pyem/metadata.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # @@ -82,47 +81,51 @@ def parse_fx_par(fn): def write_f9_par(fn, df): - formatters = {"C": lambda x: "%d" % x, - "PSI": lambda x: "%0.2f" % x, - "THETA": lambda x: "%0.2f" % x, - "PHI": lambda x: "%0.2f" % x, - "SHX": lambda x: "%0.2f" % x, - "SHY": lambda x: "%0.2f" % x, - "MAG": lambda x: "%d" % x, - "INCLUDE": lambda x: "%d" % x, - "DF1": lambda x: "%0.1f" % x, - "DF2": lambda x: "0.1f" % x, - "ANGAST": lambda x: "%0.2f" % x, - "PSHIFT": lambda x: "%0.2f" % x, - "OCC": lambda x: "%0.2f" % x, - "LOGP": lambda x: "%d" % x, - "SIGMA": lambda x: "%0.4f" % x, - "SCORE": lambda x: "%0.2f" % x, - "CHANGE": lambda x: "%0.2f" % x} + formatters = {"C": lambda x: "%7d" % x, + "PSI": lambda x: "%7.2f" % x, + "THETA": lambda x: "%7.2f" % x, + "PHI": lambda x: "%7.2f" % x, + "SHX": lambda x: "%9.2f" % x, + "SHY": lambda x: "%9.2f" % x, + "MAG": lambda x: "%7.0f" % x, + "INCLUDE": lambda x: "%5d" % x, + "DF1": lambda x: "%8.1f" % x, + "DF2": lambda x: "%8.1f" % x, + "ANGAST": lambda x: "%7.2f" % x, + "PSHIFT": lambda x: "%7.2f" % x, + "OCC": lambda x: "%7.2f" % x, + "LogP": lambda x: "%9d" % x, + "SIGMA": lambda x: "%10.4f" % x, + "SCORE": lambda x: "%7.2f" % x, + "CHANGE": lambda x: "%7.2f" % x} with open(fn, 'w') as f: f.write(df.to_string(formatters=formatters, index=False)) def write_fx_par(fn, df): - formatters = {"C": lambda x: "%d" % x, - "PSI": lambda x: "%0.2f" % x, - "THETA": lambda x: "%0.2f" % x, - "PHI": lambda x: "%0.2f" % x, - "SHX": lambda x: "%0.2f" % x, - "SHY": lambda x: "%0.2f" % x, - "MAG": lambda x: "%d" % x, - "INCLUDE": lambda x: "%d" % x, - "DF1": lambda x: "%0.1f" % x, - "DF2": lambda x: "0.1f" % x, - "ANGAST": lambda x: "%0.2f" % x, - "PSHIFT": lambda x: "%0.2f" % x, - "OCC": lambda x: "%0.2f" % x, - "LOGP": lambda x: "%d" % x, - "SIGMA": lambda x: "%0.4f" % x, - "SCORE": lambda x: "%0.2f" % x, - "CHANGE": lambda x: "%0.2f" % x} + formatters = {"C": lambda x: "%7d" % x, + "PSI": lambda x: "%7.2f" % x, + "THETA": lambda x: "%7.2f" % x, + "PHI": lambda x: "%7.2f" % x, + "SHX": lambda x: "%9.2f" % x, + "SHY": lambda x: "%9.2f" % x, + "MAG": lambda x: "%7.0f" % x, + "INCLUDE": lambda x: "%5d" % x, + "DF1": lambda x: "%8.1f" % x, + "DF2": lambda x: "%8.1f" % x, + "ANGAST": lambda x: "%7.2f" % x, + "PSHIFT": lambda x: "%7.2f" % x, + "OCC": lambda x: "%7.2f" % x, + "LogP": lambda x: "%9d" % x, + "SIGMA": lambda x: "%10.4f" % x, + "SCORE": lambda x: "%7.2f" % x, + "CHANGE": lambda x: "%7.2f" % x} with open(fn, 'w') as f: - f.write(df.to_string(formatters=formatters, index=False)) + f.write("C PSI THETA PHI SHX SHY " + "MAG INCLUDE DF1 DF2 ANGAST PSHIFT " + "OCC LogP SIGMA SCORE CHANGE\n") + df.to_string(buf=f, formatters=formatters, index=False, header=None) + f.write("\nC Blank line\n") def par2star(par, data_path, apix=1.0, cs=2.0, ac=0.07, kv=300, invert_eulers=True): @@ -221,6 +224,7 @@ def cryosparc_065_csv2star(meta, minphic=0): df = meta[[h for h in meta.columns if h in general and general[h] is not None]].copy() df.columns = rlnheaders + df = star.augment_star_ucsf(df, inplace=True) if "rlnRandomSubset" in df.columns: df["rlnRandomSubset"] = df["rlnRandomSubset"].apply( lambda x: ord(x) - 64) @@ -272,6 +276,30 @@ def cryosparc_2_cs_particle_locations(cs, df=None, swapxy=False): return df +def cryosparc_2_cs_ctf_parameters(cs, df=None): + log = logging.getLogger('root') + if df is None: + df = pd.DataFrame() + if 'ctf/tilt_A' in cs.dtype.names: + log.debug("Recovering beam tilt") + df[star.Relion.BEAMTILTX] = cs['ctf/tilt_A'][:, 0] + df[star.Relion.BEAMTILTY] = cs['ctf/tilt_A'][:, 1] + if 'ctf/shift_A' in cs.dtype.names: + pass + if 'ctf/trefoil_A' in cs.dtype.names: + pass + # df[star.Relion.ODDZERNIKE] = cs['ctf/trefoil_A'] + if 'ctf/tetrafoil_A' in cs.dtype.names: + pass + # df[star.Relion.EVENZERNIKE] = cs['ctf/tetra_A'] + if 'ctf/anisomag' in cs.dtype.names: + df[star.Relion.MAGMAT00] = cs['ctf/anisomag'][:, 0] + df[star.Relion.MAGMAT01] = cs['ctf/anisomag'][:, 1] + df[star.Relion.MAGMAT10] = cs['ctf/anisomag'][:, 2] + df[star.Relion.MAGMAT11] = cs['ctf/anisomag'][:, 3] + return df + + def cryosparc_2_cs_model_parameters(cs, df=None, minphic=0): model = {u'split': "rlnRandomSubset", u'shift': star.Relion.ORIGINS, @@ -320,7 +348,8 @@ def cryosparc_2_cs_model_parameters(cs, df=None, minphic=0): def parse_cryosparc_2_cs(csfile, passthroughs=None, minphic=0, boxsize=None, swapxy=False): - micrograph = {u'micrograph_blob/path': star.Relion.MICROGRAPH_NAME, + micrograph = {u'uid': star.UCSF.UID, + u'micrograph_blob/path': star.Relion.MICROGRAPH_NAME, u'micrograph_blob/psize_A': star.Relion.DETECTORPIXELSIZE, u'mscope_params/accel_kv': None, u'mscope_params/cs_mm': None, @@ -331,9 +360,9 @@ def parse_cryosparc_2_cs(csfile, passthroughs=None, minphic=0, boxsize=None, swa u'ctf/df2_A': star.Relion.DEFOCUSV, u'ctf/df_angle_rad': star.Relion.DEFOCUSANGLE, u'ctf/phase_shift_rad': star.Relion.PHASESHIFT, - u'ctf/cross_corr_ctffind4': "rlnCtfFigureOfMerit", - u'ctf/ctf_fit_to_A': "rlnCtfMaxResolution"} - general = {u'uid': star.UCSF.PARTICLE_UID, + u'ctf/cross_corr_ctffind4': star.Relion.CTFFIGUREOFMERIT, + u'ctf/ctf_fit_to_A': star.Relion.CTFMAXRESOLUTION} + general = {u'uid': star.UCSF.UID, u'ctf/accel_kv': star.Relion.VOLTAGE, u'blob/psize_A': star.Relion.DETECTORPIXELSIZE, u'ctf/ac': star.Relion.AC, @@ -343,8 +372,10 @@ def parse_cryosparc_2_cs(csfile, passthroughs=None, minphic=0, boxsize=None, swa u'ctf/df2_A': star.Relion.DEFOCUSV, u'ctf/df_angle_rad': star.Relion.DEFOCUSANGLE, u'ctf/phase_shift_rad': star.Relion.PHASESHIFT, - u'ctf/cross_corr_ctffind4': "rlnCtfFigureOfMerit", - u'ctf/ctf_fit_to_A': "rlnCtfMaxResolution", + u'ctf/cross_corr_ctffind4': star.Relion.CTFFIGUREOFMERIT, + u'ctf/ctf_fit_to_A': star.Relion.CTFMAXRESOLUTION, + u'ctf/bfactor': star.Relion.CTFBFACTOR, + u'ctf/exp_group_id': star.Relion.OPTICSGROUP, u'blob/path': star.UCSF.IMAGE_PATH, u'blob/idx': star.UCSF.IMAGE_INDEX, u'location/center_x_frac': None, @@ -367,16 +398,10 @@ def parse_cryosparc_2_cs(csfile, passthroughs=None, minphic=0, boxsize=None, swa pt = np.load(passthrough) names = [n for n in pt.dtype.names if n != 'uid' and n not in cs.dtype.names] if len(names) > 0: - if 'micrograph_blob/idx' in pt.dtype.names: - log.info("Micrograph file detected") - ptdf = util.dataframe_from_records_mapped(pt, micrograph) - key = star.Relion.MICROGRAPH_NAME - else: - log.info("Particle file detected") - ptdf = util.dataframe_from_records_mapped(pt, general) - ptdf = cryosparc_2_cs_particle_locations(pt, ptdf, swapxy=swapxy) - ptdf = cryosparc_2_cs_model_parameters(pt, ptdf, minphic=minphic) - key = star.UCSF.PARTICLE_UID + ptdf = util.dataframe_from_records_mapped(pt, {**general, **micrograph}) + ptdf = cryosparc_2_cs_particle_locations(pt, ptdf, swapxy=swapxy) + ptdf = cryosparc_2_cs_model_parameters(pt, ptdf, minphic=minphic) + key = star.UCSF.UID log.info("Trying to merge: %s" % ", ".join(names)) fields = [c for c in ptdf.columns if c not in df.columns] log.info("Merging: %s" % ", ".join(fields)) @@ -390,7 +415,6 @@ def parse_cryosparc_2_cs(csfile, passthroughs=None, minphic=0, boxsize=None, swa if star.UCSF.IMAGE_PATH in df: df[star.UCSF.IMAGE_PATH] = df[star.UCSF.IMAGE_PATH].apply(lambda x: x.decode('UTF-8')) - star.simplify_star_ucsf(df) df[star.Relion.MAGNIFICATION] = 10000.0 log.info("Directly copied fields: %s" % ", ".join(df.columns)) @@ -424,7 +448,7 @@ def parse_cryosparc_2_cs(csfile, passthroughs=None, minphic=0, boxsize=None, swa elif star.Relion.ANGLEPSI in df: log.debug("Converting ANGLEPSI from degrees to radians") df[star.Relion.ANGLEPSI] = np.rad2deg(df[star.Relion.ANGLEPSI]) - else: + elif star.is_particle_star(df): log.warn("Angular alignment parameters not found") return df diff --git a/pyem/mrc.py b/pyem/mrc.py index 82a133e..8b9acf9 100644 --- a/pyem/mrc.py +++ b/pyem/mrc.py @@ -27,7 +27,7 @@ def mrc_header(shape, dtype=np.float32, psz=1.0): - header = np.zeros(HEADER_LEN / np.dtype(np.int32).itemsize, dtype=np.int32) + header = np.zeros(HEADER_LEN // np.dtype(np.int32).itemsize, dtype=np.int32) header_f = header.view(np.float32) header[:3] = shape if np.dtype(dtype) not in MODE: diff --git a/pyem/plot.py b/pyem/plot.py index 22fb4d9..3970436 100644 --- a/pyem/plot.py +++ b/pyem/plot.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # diff --git a/pyem/star.py b/pyem/star.py index 42b7608..448fd7e 100755 --- a/pyem/star.py +++ b/pyem/star.py @@ -56,13 +56,20 @@ class Relion: BEAMTILTX = "rlnBeamTiltX" BEAMTILTY = "rlnBeamTiltY" BEAMTILTCLASS = "rlnBeamTiltClass" - CTFSCALEFACTOR = "rlnCtfScaleFactor" - CTFBFACTOR = "rlnCtfBFactor" + CTFSCALEFACTOR = "rlnCtfScalefactor" + CTFBFACTOR = "rlnCtfBfactor" CTFMAXRESOLUTION = "rlnCtfMaxResolution" CTFFIGUREOFMERIT = "rlnCtfFigureOfMerit" GROUPNUMBER = "rlnGroupNumber" + OPTICSGROUP = "rlnOpticsGroup" RANDOMSUBSET = "rlnRandomSubset" AUTOPICKFIGUREOFMERIT = "rlnAutopickFigureOfMerit" + ODDZERNIKE = "rlnOddZernike" + EVENZERNIKE = "rlnEvenZernike" + MAGMAT00 = "rlnMagMat00" + MAGMAT01 = "rlnMagMat01" + MAGMAT10 = "rlnMagMat10" + MAGMAT11 = "rlnMagMat11" COORDS = [COORDX, COORDY] ORIGINS = [ORIGINX, ORIGINY] ORIGINS3D = [ORIGINX, ORIGINY, ORIGINZ] @@ -88,7 +95,9 @@ class UCSF: IMAGE_ORIGINAL_BASENAME = "ucsfImageOriginalBasename" IMAGE_ORIGINAL_INDEX = "ucsfImageOriginalIndex" MICROGRAPH_BASENAME = "ucsfMicrographBasename" + UID = "ucsfUid" PARTICLE_UID = "ucsfParticleUid" + MICROGRAPH_UID = "ucsfMicrographUid" def smart_merge(s1, s2, fields, key=None, left_key=None): @@ -257,13 +266,13 @@ def parse_star(starfile, keep_index=False, augment=False, nrows=None): ln = 0 with open(starfile, 'rU') as f: for l in f: - if l.startswith("_"): + if l.strip().startswith("_"): foundheader = True lastheader = True if keep_index: - head = l.rstrip() + head = l.strip() else: - head = l.split('#')[0].rstrip().lstrip('_') + head = l.split('#')[0].strip().lstrip('_') headers.append(head) else: lastheader = False @@ -272,14 +281,18 @@ def parse_star(starfile, keep_index=False, augment=False, nrows=None): ln += 1 df = pd.read_csv(starfile, skiprows=ln, delimiter='\s+', header=None, nrows=nrows) df.columns = headers + if Relion.PHASESHIFT not in df: + df[Relion.PHASESHIFT] = 0.0 if augment: augment_star_ucsf(df, inplace=True) return df -def write_star(starfile, df, resort_fields=True, simplify=True): +def write_star(starfile, df, resort_fields=True, resort_records=False, simplify=True): if not starfile.endswith(".star"): starfile += ".star" + if resort_records: + df = sort_records(df, inplace=True) if simplify and len([c for c in df.columns if "ucsf" in c or "eman" in c]) > 0: df = simplify_star_ucsf(df) indexed = re.search("#\d+$", df.columns[0]) is not None # Check first column for '#N' index. @@ -379,7 +392,7 @@ def augment_star_ucsf(df, inplace=True): return df -def simplify_star_ucsf(df, inplace=True): +def simplify_star_ucsf(df, resort_index=False, inplace=True): df = df if inplace else df.copy() if UCSF.IMAGE_ORIGINAL_INDEX in df and UCSF.IMAGE_ORIGINAL_PATH in df: df[Relion.IMAGE_ORIGINAL_NAME] = df[UCSF.IMAGE_ORIGINAL_INDEX].map( @@ -390,7 +403,7 @@ def simplify_star_ucsf(df, inplace=True): lambda x: "%.6d" % (x + 1)).str.cat(df[UCSF.IMAGE_PATH], sep="@") df.drop([c for c in df.columns if "ucsf" in c or "eman" in c], axis=1, inplace=True) - if "index" in df.columns: + if resort_index and "index" in df.columns: df.set_index("index", inplace=True) df.sort_index(inplace=True, kind="mergesort") return df diff --git a/pyem/util/util.py b/pyem/util/util.py index 9e599e9..5520223 100644 --- a/pyem/util/util.py +++ b/pyem/util/util.py @@ -38,10 +38,9 @@ def relion_symmetry_group(sym): if relion is None: raise RuntimeError( "Need relion_refine on PATH to obtain symmetry operators") - stdout = subprocess.check_output( - ("%s --sym %s --i /dev/null --o /dev/null --print_symmetry_ops" % - (relion, sym)).split()) - lines = stdout.split("\n")[2:-1] + stdout = subprocess.getoutput( + "%s --sym %s --i /dev/null --o /dev/null --print_symmetry_ops" % (relion, sym)) + lines = stdout.split("\n")[2:] return [np.array( [[np.double(val) for val in l.split()] for l in lines[i:i + 3]]) for i in range(1, len(lines), 4)] @@ -74,7 +73,7 @@ def join_struct_arrays(arrays): def dataframe_from_records_mapped(rec, field_dict): - names = [str(k) for k in field_dict if field_dict[k] is not None and k in rec.dtype.names] + names = list(set([str(k) for k in field_dict if field_dict[k] is not None and k in rec.dtype.names])) df = pd.DataFrame.from_records(rec[names]) df.columns = [field_dict[k] for k in names] return df diff --git a/pyem/vop/binary.py b/pyem/vop/binary.py index b891156..bcc4c4d 100644 --- a/pyem/vop/binary.py +++ b/pyem/vop/binary.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # Copyright (C) 2017-2018 Daniel Asarnow # University of California, San Francisco # diff --git a/pyem/vop/vop.py b/pyem/vop/vop.py index 0635c93..36b1b26 100644 --- a/pyem/vop/vop.py +++ b/pyem/vop/vop.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # @@ -29,21 +28,28 @@ def ismask(vol): Even with a soft edge, a mask will have very few unique values (unless it's already been resampled). The 1D slice below treats just the central XY section for speed. Real maps have ~20,000 unique values here. """ - return np.unique(vol[vol.shape[2] / 2::vol.shape[2]]).size < 100 + return np.unique(vol[vol.shape[2] // 2::vol.shape[2]]).size < 100 -def resample_volume(vol, r=None, t=None, ori=None, order=3, compat="mrc2014", indexing="ij", invert=False): - if r is None and t is None: +def resample_volume(vol, r=None, t=None, ori=None, order=3, compat="mrc2014", + indexing="ij", invert=False, scale=None, output_shape=None): + if r is None and t is None and scale is None and (output_shape is None or np.array_equal(output_shape, vol.shape)): return vol.copy() - center = np.array(vol.shape) // 2 + if output_shape is None: + output_shape = np.array(vol.shape) + elif np.isscalar(output_shape): + output_shape = np.array((output_shape, output_shape, output_shape)) - x, y, z = np.meshgrid(*[np.arange(-c, c) for c in center], indexing=indexing) + x, y, z = np.meshgrid(*[np.arange(-c, c) for c in output_shape // 2], indexing=indexing) xyz = np.vstack([x.reshape(-1), y.reshape(-1), z.reshape(-1), np.ones(x.size)]) if ori is not None: xyz -= ori[:, None] + if r is None: + r = np.eye(3) + th = np.eye(4) if t is None and r.shape[1] == 4: t = np.squeeze(r[:, 3]) @@ -51,8 +57,12 @@ def resample_volume(vol, r=None, t=None, ori=None, order=3, compat="mrc2014", in th[:3, 3] = t rh = np.eye(4) - if r is not None: - rh[:3, :3] = r[:3, :3].T + rh[:3, :3] = r[:3, :3].T + + if scale is not None: + rh[:3, :3] /= scale + + center = np.array(vol.shape) // 2 if invert: th[:3, 3] = -th[:3, 3] @@ -61,7 +71,7 @@ def resample_volume(vol, r=None, t=None, ori=None, order=3, compat="mrc2014", in else: xyz = th.dot(rh.dot(xyz))[:3, :] + center[:, None] - xyz = np.array([arr.reshape(vol.shape) for arr in xyz]) + xyz = np.array([arr.reshape(output_shape) for arr in xyz]) if "relion" in compat.lower() or "xmipp" in compat.lower(): xyz = xyz[::-1] @@ -72,7 +82,7 @@ def resample_volume(vol, r=None, t=None, ori=None, order=3, compat="mrc2014", in def grid_correct(vol, pfac=2, order=1): n = vol.shape[0] - nhalf = n / 2 + nhalf = n // 2 x, y, z = np.meshgrid(*[np.arange(-nhalf, nhalf)] * 3, indexing="xy") r = np.sqrt(x**2 + y**2 + z**2, dtype=vol.dtype) / (n * pfac) with np.errstate(divide="ignore", invalid="ignore"): @@ -114,7 +124,7 @@ def vol_ft(vol, pfac=2, threads=1, normfft=1): :param normfft: Normalization constant for Fourier transform. """ vol = grid_correct(vol, pfac=pfac, order=1) - padvol = np.pad(vol, (vol.shape[0] * pfac - vol.shape[0]) // 2, "constant") + padvol = np.pad(vol, int((vol.shape[0] * pfac - vol.shape[0]) // 2), "constant") ft = rfftn(np.fft.ifftshift(padvol), padvol.shape, threads=threads) ftc = np.zeros((ft.shape[0] + 3, ft.shape[1] + 3, ft.shape[2]), dtype=ft.dtype) fill_ft(ft, ftc, vol.shape[0], normfft=normfft) diff --git a/pyem/vop/vop_numba.py b/pyem/vop/vop_numba.py index 25065d8..4a097a9 100644 --- a/pyem/vop/vop_numba.py +++ b/pyem/vop/vop_numba.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python2.7 # Copyright (C) 2017-2018 Daniel Asarnow # University of California, San Francisco # diff --git a/recenter.py b/recenter.py index 72a7050..d152032 100755 --- a/recenter.py +++ b/recenter.py @@ -1,4 +1,4 @@ -#! /usr/bin/python2.7 +#!/usr/bin/env python # Copyright (C) 2016 Daniel Asarnow # University of California, San Francisco # diff --git a/reconstruct.py b/reconstruct.py index 9296a13..68e4845 100755 --- a/reconstruct.py +++ b/reconstruct.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # diff --git a/sort.py b/sort.py index 43ae3cf..5784662 100644 --- a/sort.py +++ b/sort.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2018 Daniel Asarnow # University of California, San Francisco diff --git a/stack.py b/stack.py index b5bb554..126682f 100755 --- a/stack.py +++ b/stack.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2017-2018 Daniel Asarnow # University of California, San Francisco # diff --git a/star.py b/star.py index e5b446a..e0b9885 100755 --- a/star.py +++ b/star.py @@ -232,10 +232,10 @@ def main(args): return 0 if args.auxout is not None and dfaux is not None: - star.write_star(args.auxout, dfaux, simplify=args.augment_output) + star.write_star(args.auxout, dfaux, resort_records=args.sort, simplify=args.augment_output) if args.output is not None: - star.write_star(args.output, df, simplify=args.augment_output) + star.write_star(args.output, df, resort_records=args.sort, simplify=args.augment_output) return 0 @@ -317,6 +317,7 @@ def main(args): type=str) parser.add_argument("--invert-hand", help="Alter Euler angles to invert handedness of reconstruction", action="store_true") + parser.add_argument("--sort", help="Natsort the output file", action="store_true") parser.add_argument("input", help="Input .star file(s) or unquoted glob", nargs="*") parser.add_argument("output", help="Output .star file") sys.exit(main(parser.parse_args())) diff --git a/star2bild.py b/star2bild.py index 11d36e7..1baa977 100755 --- a/star2bild.py +++ b/star2bild.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2018 Daniel Asarnow # University of California, San Francisco diff --git a/subparticles.py b/subparticles.py index cc70152..2c43c38 100755 --- a/subparticles.py +++ b/subparticles.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco # @@ -37,7 +37,7 @@ def main(args): if args.target is None and args.sym is None and args.transform is None and args.euler is None: log.error("At least a target, transformation matrix, Euler angles, or a symmetry group must be provided") return 1 - elif args.target is not None and args.boxsize is None and args.origin is None: + elif (args.target is not None or args.transform is not None) and args.boxsize is None and args.origin is None: log.error("An origin must be provided via --boxsize or --origin") return 1 diff --git a/varmap.py b/varmap.py index 60181bd..faa1dae 100755 --- a/varmap.py +++ b/varmap.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python2.7 +#!/usr/bin/env python # Copyright (C) 2017 Daniel Asarnow # University of California, San Francisco #