|
# -*- coding: utf-8 -*-
|
|
"""
|
|
Created on Mon Oct 22 10:06:54 2018
|
|
|
|
@author: micah_x
|
|
"""
|
|
|
|
from __future__ import print_function,division
|
|
|
|
import os
|
|
import sys
|
|
import pandas as pd
|
|
import numpy as np
|
|
import star as star
|
|
import re
|
|
import argparse
|
|
|
|
parser = argparse.ArgumentParser('Script for generating a coordinate list and corresponding image list')
|
|
parser.add_argument('paths', nargs='+',help='path to preprocessed micrographs. should end with *tiff')
|
|
parser.add_argument('file', help='path to input star file')
|
|
parser.add_argument('-oi', '--outputimage', help='output image list file. make it a .txt')
|
|
parser.add_argument('-oc', '--outputcoord', help='output coordinate file. make it a .txt')
|
|
parser.add_argument('-of', '--outputfilter', help='output filtered image list file. make it a .txt')
|
|
args = parser.parse_args()
|
|
|
|
## generate image list
|
|
outi = open(args.outputimage,"w+")
|
|
paths = args.paths
|
|
outi.write('image_name\tpath' + '\n')
|
|
for path in paths:
|
|
name = os.path.basename(path)
|
|
name = os.path.splitext(name)[0]
|
|
outi.write(name + '\t' + path + '\n')
|
|
outi.close()
|
|
|
|
## star to coordinates
|
|
def strip_ext(name):
|
|
clean_name,ext = os.path.splitext(name)
|
|
return clean_name
|
|
|
|
with open(args.file, 'r') as f:
|
|
table = star.parse(f)
|
|
|
|
if 'ParticleScore' in table.columns:
|
|
## columns of interest are 'MicrographName', 'CoordinateX', 'CoordinateY', and 'ParticleScore'
|
|
table = table[['MicrographName', 'CoordinateX', 'CoordinateY', 'ParticleScore']]
|
|
table.columns = ['image_name', 'x_coord', 'y_coord', 'score']
|
|
else:
|
|
## columns of interest are 'MicrographName', 'CoordinateX', and 'CoordinateY'
|
|
table = table[['MicrographName', 'CoordinateX', 'CoordinateY']]
|
|
table.columns = ['image_name', 'x_coord', 'y_coord']
|
|
## convert the coordinates to integers
|
|
table['x_coord'] = table['x_coord'].astype(float).astype(int)
|
|
table['y_coord'] = table['y_coord'].astype(float).astype(int)
|
|
## strip file extensions off the image names if present
|
|
table['image_name'] = table['image_name'].apply(strip_ext).str.replace(r'(\S*/)','')
|
|
|
|
outc = sys.stdout
|
|
if args.outputcoord is not None:
|
|
outc = args.outputcoord
|
|
table.to_csv(outc, sep='\t', index=False)
|
|
|
|
## filter image list
|
|
targets = pd.read_csv(args.outputcoord, sep='\t')
|
|
images = pd.read_csv(args.outputimage, delim_whitespace=True, names=["image_name","path"])
|
|
|
|
images = images.loc[images.image_name.isin(targets.image_name)]
|
|
|
|
## write the filtered images
|
|
out = sys.stdout if args.outputfilter is None else open(args.outputfilter, 'w')
|
|
images.to_csv(out, sep='\t', header=True, index=False)
|
|
if args.outputfilter is not None:
|
|
out.close()
|