Project

General

Profile

Feature #6748 » topaz_input_prep.py

/home/mrapp/topaz_input_prep.py - Sargis Dallakyan, 02/15/2019 03:40 PM

 
# -*- coding: utf-8 -*-
"""
Created on Mon Oct 22 10:06:54 2018

@author: micah_x
"""

from __future__ import print_function,division

import os
import sys
import pandas as pd
import numpy as np
import star as star
import re
import argparse

parser = argparse.ArgumentParser('Script for generating a coordinate list and corresponding image list')
parser.add_argument('paths', nargs='+',help='path to preprocessed micrographs. should end with *tiff')
parser.add_argument('file', help='path to input star file')
parser.add_argument('-oi', '--outputimage', help='output image list file. make it a .txt')
parser.add_argument('-oc', '--outputcoord', help='output coordinate file. make it a .txt')
parser.add_argument('-of', '--outputfilter', help='output filtered image list file. make it a .txt')
args = parser.parse_args()

## generate image list
outi = open(args.outputimage,"w+")
paths = args.paths
outi.write('image_name\tpath' + '\n')
for path in paths:
name = os.path.basename(path)
name = os.path.splitext(name)[0]
outi.write(name + '\t' + path + '\n')
outi.close()

## star to coordinates
def strip_ext(name):
clean_name,ext = os.path.splitext(name)
return clean_name

with open(args.file, 'r') as f:
table = star.parse(f)

if 'ParticleScore' in table.columns:
## columns of interest are 'MicrographName', 'CoordinateX', 'CoordinateY', and 'ParticleScore'
table = table[['MicrographName', 'CoordinateX', 'CoordinateY', 'ParticleScore']]
table.columns = ['image_name', 'x_coord', 'y_coord', 'score']
else:
## columns of interest are 'MicrographName', 'CoordinateX', and 'CoordinateY'
table = table[['MicrographName', 'CoordinateX', 'CoordinateY']]
table.columns = ['image_name', 'x_coord', 'y_coord']
## convert the coordinates to integers
table['x_coord'] = table['x_coord'].astype(float).astype(int)
table['y_coord'] = table['y_coord'].astype(float).astype(int)
## strip file extensions off the image names if present
table['image_name'] = table['image_name'].apply(strip_ext).str.replace(r'(\S*/)','')

outc = sys.stdout
if args.outputcoord is not None:
outc = args.outputcoord
table.to_csv(outc, sep='\t', index=False)

## filter image list
targets = pd.read_csv(args.outputcoord, sep='\t')
images = pd.read_csv(args.outputimage, delim_whitespace=True, names=["image_name","path"])

images = images.loc[images.image_name.isin(targets.image_name)]

## write the filtered images
out = sys.stdout if args.outputfilter is None else open(args.outputfilter, 'w')
images.to_csv(out, sep='\t', header=True, index=False)
if args.outputfilter is not None:
out.close()
(2-2/5)