Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into release
Browse files Browse the repository at this point in the history
  • Loading branch information
asarnow committed Aug 30, 2019
2 parents 037a201 + bf97d2a commit 138728b
Show file tree
Hide file tree
Showing 12 changed files with 185 additions and 29 deletions.
2 changes: 1 addition & 1 deletion csparc2star.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def main(args):
df = star.transform_star(df, r, inplace=True)

# Write Relion .star file with correct headers.
star.write_star(args.output, df, reindex=True)
star.write_star(args.output, df)
log.info("Output fields: %s" % ", ".join(df.columns))
return 0

Expand Down
79 changes: 79 additions & 0 deletions ctf2star.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env python3
# Copyright (C) 2019 Daniel Asarnow
# University of California, San Francisco
#
# Simple program for converting CTFFIND4 output to micrograph .star file.
# See help text and README file for more information.
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
# *_ctfEstimation.txt sample:
# # Output from CTFFind version 4.1.10, run on 2019-05-29 19:35:36
# # Input file: Runs/000126_ProtCTFFind/tmp/mic_0126/20190529_hABCA4_DA-52-4_99_0009_aligned_mic.mrc ; Number of micrographs: 1
# # Pixel size: 1.140 Angstroms ; acceleration voltage: 200.0 keV ; spherical aberration: 2.70 mm ; amplitude contrast: 0.10
# # Box size: 512 pixels ; min. res.: 22.8 Angstroms ; max. res.: 2.9 Angstroms ; min. def.: 5000.0 um; max. def. 40000.0 um
# # Columns: #1 - micrograph number; #2 - defocus 1 [Angstroms]; #3 - defocus 2; #4 - azimuth of astigmatism; #5 - additional phase shift [radians]; #6 - cross correlation; #7 - spacing (in Angstroms) up to which CTF rings were fit successfully
# 1.000000 16793.875000 15208.728516 -66.871671 0.000000 -0.029152 9.327273
import glob
import os.path
import pandas as pd
import sys
from pyem import star


def main(args):
data = []
if os.path.isdir(args.input[0]):
flist = glob.glob(os.path.join(args.input[0], "*_ctfEstimation.txt"))
else:
flist = args.input
for fn in flist:
row = {}
with open(fn, 'r') as f:
lines = f.readlines()
g = lines[1].lstrip("# Input file:").split(" ;")[0]
if args.apix is None:
args.apix = float(lines[2].lstrip("# Pixel size:").split("Angstrom")[0])
tok = lines[-1].split()
if args.path is None:
row[star.Relion.MICROGRAPH_NAME] = g
else:
row[star.Relion.MICROGRAPH_NAME] = os.path.join(args.path, os.path.basename(g))
row[star.Relion.DEFOCUSU] = float(tok[1])
row[star.Relion.DEFOCUSV] = float(tok[2])
row[star.Relion.DEFOCUSANGLE] = float(tok[3])
row[star.Relion.PHASESHIFT] = float(tok[4])
row[star.Relion.CTFFIGUREOFMERIT] = float(tok[5])
row[star.Relion.CTFMAXRESOLUTION] = float(tok[6])
row[star.Relion.MAGNIFICATION] = 10000
row[star.Relion.DETECTORPIXELSIZE] = args.apix
data.append(row)
df = pd.DataFrame(data)
if not args.no_sort:
df = star.sort_records(df, inplace=True)
df = star.sort_fields(df, inplace=True)
star.write_star(args.output, df)
return 0


if __name__ == "__main__":
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("input", nargs="*")
parser.add_argument("output")
parser.add_argument("--path", "-p", help="New path prepended to micrograph basenames", type=str)
parser.add_argument("--no-sort", "-n", help="Preserve input filename order", action="store_true")
parser.add_argument("--apix", help="Override pixel size (Angstroms)", type=float)
sys.exit(main(parser.parse_args()))

2 changes: 1 addition & 1 deletion par2star.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def main(args):
if args.cls is not None:
df = star.select_classes(df, args.cls)

star.write_star(args.output, df, reindex=True)
star.write_star(args.output, df)
return 0


Expand Down
2 changes: 1 addition & 1 deletion projection_subtraction.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def init():
if args.crop is not None:
df = star.recenter(df, inplace=True)
star.simplify_star_ucsf(df)
star.write_star(args.output, df, reindex=True)
star.write_star(args.output, df)

return 0

Expand Down
22 changes: 22 additions & 0 deletions pyem/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,28 @@ def parse_fx_par(fn):
return df


def write_f9_par(fn, df):
formatters = {"C": lambda x: "%d" % x,
"PSI": lambda x: "%0.2f" % x,
"THETA": lambda x: "%0.2f" % x,
"PHI": lambda x: "%0.2f" % x,
"SHX": lambda x: "%0.2f" % x,
"SHY": lambda x: "%0.2f" % x,
"MAG": lambda x: "%d" % x,
"INCLUDE": lambda x: "%d" % x,
"DF1": lambda x: "%0.1f" % x,
"DF2": lambda x: "0.1f" % x,
"ANGAST": lambda x: "%0.2f" % x,
"PSHIFT": lambda x: "%0.2f" % x,
"OCC": lambda x: "%0.2f" % x,
"LOGP": lambda x: "%d" % x,
"SIGMA": lambda x: "%0.4f" % x,
"SCORE": lambda x: "%0.2f" % x,
"CHANGE": lambda x: "%0.2f" % x}
with open(fn, 'w') as f:
f.write(df.to_string(formatters=formatters, index=False))


def write_fx_par(fn, df):
formatters = {"C": lambda x: "%d" % x,
"PSI": lambda x: "%0.2f" % x,
Expand Down
66 changes: 52 additions & 14 deletions pyem/star.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python2.7
# Copyright (C) 2016 Daniel Asarnow
# University of California, San Francisco
#
Expand Down Expand Up @@ -26,10 +25,12 @@
from math import modf
from pyem.geom import e2r_vec
from pyem.geom import rot2euler
from pyem.util import natsort_values


class Relion:
MICROGRAPH_NAME = "rlnMicrographName"
MICROGRAPH_NAME_NODW = "rlnMicrographNameNoDW"
IMAGE_NAME = "rlnImageName"
IMAGE_ORIGINAL_NAME = "rlnImageOriginalName"
RECONSTRUCT_IMAGE_NAME = "rlnReconstructImageName"
Expand Down Expand Up @@ -66,14 +67,18 @@ class Relion:
ORIGINS = [ORIGINX, ORIGINY]
ORIGINS3D = [ORIGINX, ORIGINY, ORIGINZ]
ANGLES = [ANGLEROT, ANGLETILT, ANGLEPSI]
ALIGNMENTS = ANGLES + ORIGINS
ALIGNMENTS = ANGLES + ORIGINS3D
CTF_PARAMS = [DEFOCUSU, DEFOCUSV, DEFOCUSANGLE, CS, PHASESHIFT, AC,
BEAMTILTX, BEAMTILTY, BEAMTILTCLASS, CTFSCALEFACTOR, CTFBFACTOR,
CTFMAXRESOLUTION, CTFFIGUREOFMERIT]
MICROSCOPE_PARAMS = [VOLTAGE, MAGNIFICATION, DETECTORPIXELSIZE]
MICROGRAPH_COORDS = [MICROGRAPH_NAME] + COORDS
PICK_PARAMS = MICROGRAPH_COORDS + [ANGLEPSI, CLASS, AUTOPICKFIGUREOFMERIT]

FIELD_ORDER = [IMAGE_NAME, IMAGE_ORIGINAL_NAME, MICROGRAPH_NAME, MICROGRAPH_NAME_NODW] + \
COORDS + ALIGNMENTS + MICROSCOPE_PARAMS + CTF_PARAMS + \
[CLASS + GROUPNUMBER + RANDOMSUBSET]


class UCSF:
IMAGE_PATH = "ucsfImagePath"
Expand All @@ -86,11 +91,13 @@ class UCSF:
PARTICLE_UID = "ucsfParticleUid"


def smart_merge(s1, s2, fields, key=None):
def smart_merge(s1, s2, fields, key=None, left_key=None):
if key is None:
key = merge_key(s1, s2)
if left_key is None:
left_key = key
s2 = s2.set_index(key, drop=False)
s1 = s1.merge(s2[s2.columns.intersection(fields)], left_on=key, right_index=True, suffixes=["_x", ""])
s1 = s1.merge(s2[s2.columns.intersection(fields)], left_on=left_key, right_index=True, suffixes=["_x", ""])
x = [c for c in s1.columns if "_x" in c]
if len(x) > 0:
y = [c.split("_")[0] for c in s1.columns if c in x]
Expand Down Expand Up @@ -156,6 +163,14 @@ def select_classes(df, classes):
return df.loc[ind]


def to_micrographs(df):
gb = df.groupby(Relion.MICROGRAPH_NAME)
mu = gb.mean()
df = mu[[c for c in Relion.CTF_PARAMS + Relion.MICROSCOPE_PARAMS +
[Relion.MICROGRAPH_NAME] if c in mu]].reset_index()
return df


def split_micrographs(df):
gb = df.groupby(Relion.MICROGRAPH_NAME)
dfs = {}
Expand Down Expand Up @@ -262,21 +277,17 @@ def parse_star(starfile, keep_index=False, augment=False, nrows=None):
return df


def write_star(starfile, df, reindex=True, simplify=True):
def write_star(starfile, df, resort_fields=True, simplify=True):
if not starfile.endswith(".star"):
starfile += ".star"
if simplify and len([c for c in df.columns if "ucsf" in c or "eman" in c]) > 0:
df = simplify_star_ucsf(df)
indexed = re.search("#\d+$", df.columns[0]) is not None # Check first column for '#N' index.
if reindex and not indexed: # No index present, append consecutive indices to sorted headers.
order = np.argsort(df.columns)
names = [df.columns[idx] + " #%d" % (i + 1) for i, idx in enumerate(order)]
elif reindex and indexed: # Replace existing indices with consecutive indices after sorting headers.
names = [c.split("#")[0].rstrip()for c in df.columns]
order = np.argsort(names)
names = [df.columns[idx] + " #%d" % (i + 1) for i, idx in enumerate(order)]
if not indexed:
if resort_fields:
df = sort_fields(df, inplace=True)
names = [idx + " #%d" % (i + 1) for i, idx in enumerate(df.columns)]
else:
order = np.arange(df.shape[1])
names = df.columns
with open(starfile, 'w') as f:
f.write('\n')
Expand All @@ -287,7 +298,7 @@ def write_star(starfile, df, reindex=True, simplify=True):
line = name + " \n"
line = line if line.startswith('_') else '_' + line
f.write(line)
df[df.columns[order]].to_csv(starfile, mode='a', sep=' ', header=False, index=False)
df.to_csv(starfile, mode='a', sep=' ', header=False, index=False, float_format='%.6f')


def transform_star(df, r, t=None, inplace=False, rots=None, invert=False, rotate=True, adjust_defocus=False):
Expand Down Expand Up @@ -383,3 +394,30 @@ def simplify_star_ucsf(df, inplace=True):
df.set_index("index", inplace=True)
df.sort_index(inplace=True, kind="mergesort")
return df


def sort_fields(df, inplace=False):
df = df if inplace else df.copy()
columns = [c for c in Relion.FIELD_ORDER if c in df] + \
[c for c in df.columns if c not in Relion.FIELD_ORDER]
df = df.reindex(columns=columns, copy=False)
return df


def sort_records(df, inplace=False):
df = df if inplace else df.copy()
if is_particle_star(df):
if UCSF.IMAGE_INDEX in df:
# df.sort_values([UCSF.IMAGE_PATH, UCSF.IMAGE_INDEX], inplace=True)
df = natsort_values(df, df[UCSF.IMAGE_PATH] + "_" + df[UCSF.IMAGE_INDEX].astype(str), inplace=True)
else:
df = natsort_values(df, Relion.MICROGRAPH_NAME, inplace=True)
return df


def original_field(field):
tok = re.findall("[A-Z][a-z]+", field)
tok = tok[0] + "Original" + "".join(tok[1:])
lead = re.match(r".*?[a-z].*?(?=[A-Z])", field).group()
field = lead + tok
return field
13 changes: 13 additions & 0 deletions pyem/util/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
from __future__ import absolute_import
import bisect
import natsort
import numpy as np
import pandas as pd
import subprocess
Expand Down Expand Up @@ -123,3 +124,15 @@ def write_q_series(vol, qarr, basename, psz=1., order=1):
r = geom.quat2rot(q / np.linalg.norm(q))
decoy = vop.resample_volume(vol, r=r, order=order)
mrc.write(basename % i, decoy, psz=psz)


def natsort_values(df, col, inplace=False):
df = df if inplace else df.copy()
if type(col) is str:
idx = np.array(natsort.index_natsorted(df[col]))
else:
idx = np.array(natsort.index_natsorted(col))
df["__natsort_key__"] = np.argsort(idx)
df.sort_values("__natsort_key__", inplace=True)
df.drop("__natsort_key__", axis=1, inplace=True)
return df
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,4 @@ pandas>=0.23.4
pathos>=0.2.1
pyfftw>=0.10
healpy>=1.11
natsort>=6.0
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
author='Daniel Asarnow',
author_email='[email protected]',
description='Python programs for electron microscopy',
install_requires=['future', 'numba', 'numpy', 'scipy', 'matplotlib', 'seaborn', 'pandas', 'pathos', 'pyfftw', 'healpy'],
install_requires=['future', 'numba', 'numpy', 'scipy', 'matplotlib',
'seaborn', 'pandas', 'pathos', 'pyfftw', 'healpy', 'natsort'],
zip_safe=False
)
2 changes: 1 addition & 1 deletion stack.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def main(args):
df = pd.concat(dfs, join="inner")
# df = pd.concat(dfs)
# df = df.dropna(df, axis=1, how="any")
star.write_star(args.star, df, reindex=True)
star.write_star(args.star, df)

return 0

Expand Down
14 changes: 8 additions & 6 deletions star.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2.7
#!/usr/bin/env python
# Copyright (C) 2018 Daniel Asarnow
# University of California, San Francisco
#
Expand Down Expand Up @@ -184,10 +184,7 @@ def main(args):
df.iloc[ind])

if args.to_micrographs:
gb = df.groupby(star.Relion.MICROGRAPH_NAME)
mu = gb.mean()
df = mu[[c for c in star.Relion.CTF_PARAMS + star.Relion.MICROSCOPE_PARAMS + [star.Relion.MICROGRAPH_NAME] if
c in mu]].reset_index()
df = star.to_micrographs(df)

if args.micrograph_range:
df.set_index(star.Relion.MICROGRAPH_NAME, inplace=True)
Expand Down Expand Up @@ -221,8 +218,12 @@ def main(args):
if args.merge_key is not None:
if "," in args.merge_key:
args.merge_key = args.merge_key.split(",")
if args.by_original:
args.by_original = star.original_field(args.merge_key)
else:
args.by_original = args.merge_key
merge_star = star.parse_star(args.merge_source, augment=args.augment)
df = star.smart_merge(df, merge_star, fields=args.merge_fields, key=args.merge_key)
df = star.smart_merge(df, merge_star, fields=args.merge_fields, key=args.merge_key, left_key=args.by_original)

if args.split_micrographs:
dfs = star.split_micrographs(df)
Expand Down Expand Up @@ -265,6 +266,7 @@ def main(args):
parser.add_argument("--merge-fields", help="Field(s) to merge", metavar="f1,f2...fN", type=str)
parser.add_argument("--merge-key", help="Override merge key detection with explicit key field(s)",
metavar="f1,f2...fN", type=str)
parser.add_argument("--by-original", help="Merge using \"original\" field name in input .star", action="store_true")
parser.add_argument("--drop-angles", help="Drop tilt, psi and rot angles from output",
action="store_true")
parser.add_argument("--drop-containing",
Expand Down
8 changes: 4 additions & 4 deletions varmap.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@

def main(args):
x = mrc.read(args.input[0])
sigma = np.zeros(x.shape)
m2 = np.zeros(x.shape)
mu = x.copy()
for i, f in enumerate(args.input[1:]):
x = mrc.read(f)
olddif = x - mu
mu += (x - mu) / (i + 1)
sigma += olddif * (x - mu)
sigma_sq = np.power(sigma, 2)
mrc.write(args.output, sigma_sq)
m2 += olddif * (x - mu)
var = m2 / len(args.input)
mrc.write(args.output, var)
if args.mean is not None:
mrc.write(args.mean, mu)
return 0
Expand Down

0 comments on commit 138728b

Please sign in to comment.