Project

General

Profile

Feature #2839 » combine_generations.py

Neil Voss, 07/15/2014 09:18 AM

 
#!/usr/bin/env python

'''
script used to combine generation files in python, if the lists of stacks is NOT virtual
(i.e. was NOT made using a BDB file format). For example, need to relate all particles in
generation_4_accounted.txt, with particles numbering 0-->n back to the original stack with
the same particle numbering.
'''

import os
import sys
import time
import numpy
import EMAN2

def listToString(myList):
myStr = ""
for item in myList:
myStr += "%d,"%(item)
return myStr

def readGenerationFile(genFile):
t0 = time.time()
if not os.path.isfile(genFile):
print "File not found", genFile
return numpy.array([])
f = open(genFile, "r")
mylist = []
for line in f:
sint = int(line.lstrip())
mylist.append(sint)
f.close()
array = numpy.array(mylist)
#print "read file", time.time() -t0
return array

def trackParticles():
### input: number of generations & lists with particle numbers to exclude in each generation
### read generation files
genParamsAcct = {}
genParamsUnacct = {}
exdict = {}
eman2func = EMAN2.EMData()
numberOfGenerations = 19
bigClassDict = {}
f = open("generationClassMembers.csv", "w")
f.write("generation\tclassNum\tnumMembers\tmember\n")
t1 = time.time()
for i in range(numberOfGenerations):
t0 = time.time()
generation = i+1
genfileAcct = "generation_%d_accounted.txt" % (generation)
if not os.path.isfile(genfileAcct):
continue
genParamsAcct[generation] = readGenerationFile(genfileAcct)
genfileUnacct = "generation_%d_unaccounted.txt" % (generation)
genParamsUnacct[generation] = readGenerationFile(genfileUnacct)
print ( "Gen %d: Acct: %d / Unacct: %d = Total %d"
%(generation, len(genParamsAcct[generation]), len(genParamsUnacct[generation]),
len(genParamsAcct[generation])+ len(genParamsUnacct[generation])))

### read HDF file to get class members
headerOnly = True
classFile = "class_averages_generation_%d.hdf"%(generation)
classInfoList = eman2func.read_images(classFile, [], headerOnly)
bigClassDict[generation] = []
classNum = 0
for classInfo in classInfoList:
members = classInfo.get_attr('members')
members.sort()
adjGeneration = generation
while adjGeneration > 1:
### loop through accounted generations & find, for each particle, the corresponding match in
### previous unaccounted stack; also takes into account discounted members
adjGeneration -= 1
try:
members = genParamsUnacct[adjGeneration][members]
except KeyError:
pass
bigClassDict[generation].append(members)
classNum += 1
myStr = listToString(members)
f.write("%d\t%d\t%d\t%s\n"%(generation, classNum, len(members), myStr))
print time.time() - t0
f.close()
print time.time() - t1

if __name__ == "__main__"
trackParticles()
(1-1/2)