topaz_input_prep.py - Appion - Electron Microscopy Group

Feature #6748 » topaz_input_prep.py

/home/mrapp/topaz_input_prep.py - Sargis Dallakyan, 02/15/2019 03:40 PM

    
    # -*- coding: utf-8 -*-

    """

    Created on Mon Oct 22 10:06:54 2018

    @author: micah_x

    """

    from __future__ import print_function,division

    import os

    import sys

    import pandas as pd

    import numpy as np

    import star as star

    import re

    import argparse

    parser = argparse.ArgumentParser('Script for generating a coordinate list and corresponding image list')

    parser.add_argument('paths', nargs='+',help='path to preprocessed micrographs. should end with *tiff')

    parser.add_argument('file', help='path to input star file')

    parser.add_argument('-oi', '--outputimage', help='output image list file. make it a .txt')

    parser.add_argument('-oc', '--outputcoord', help='output coordinate file. make it a .txt')

    parser.add_argument('-of', '--outputfilter', help='output filtered image list file. make it a .txt')

    args = parser.parse_args()

    ## generate image list    

    outi = open(args.outputimage,"w+")

    paths = args.paths

    outi.write('image_name\tpath' + '\n')

    for path in paths:

        name = os.path.basename(path)

        name = os.path.splitext(name)[0]

        outi.write(name + '\t' + path + '\n')

    outi.close()

    ## star to coordinates

    def strip_ext(name):

        clean_name,ext = os.path.splitext(name)

        return clean_name

    with open(args.file, 'r') as f:

        table = star.parse(f)

    if 'ParticleScore' in table.columns:

        ## columns of interest are 'MicrographName', 'CoordinateX', 'CoordinateY', and 'ParticleScore'

        table = table[['MicrographName', 'CoordinateX', 'CoordinateY', 'ParticleScore']]

        table.columns = ['image_name', 'x_coord', 'y_coord', 'score']

    else:

        ## columns of interest are 'MicrographName', 'CoordinateX', and 'CoordinateY'

        table = table[['MicrographName', 'CoordinateX', 'CoordinateY']]

        table.columns = ['image_name', 'x_coord', 'y_coord']

    ## convert the coordinates to integers

    table['x_coord'] = table['x_coord'].astype(float).astype(int)

    table['y_coord'] = table['y_coord'].astype(float).astype(int)

    ## strip file extensions off the image names if present

    table['image_name'] = table['image_name'].apply(strip_ext).str.replace(r'(\S*/)','')

    outc = sys.stdout

    if args.outputcoord is not None:

        outc = args.outputcoord

    table.to_csv(outc, sep='\t', index=False)

    ## filter image list

    targets = pd.read_csv(args.outputcoord, sep='\t')

    images = pd.read_csv(args.outputimage, delim_whitespace=True, names=["image_name","path"])

    images = images.loc[images.image_name.isin(targets.image_name)]

    ## write the filtered images

    out = sys.stdout if args.outputfilter is None else open(args.outputfilter, 'w')

    images.to_csv(out, sep='\t', header=True, index=False)

    if args.outputfilter is not None:

        out.close()

(2-2/5)

Project

General

Profile

Appion

Feature #6748 » topaz_input_prep.py