|
#!/usr/bin/env python
|
|
|
|
'''
|
|
script used to combine generation files in python, if the lists of stacks is NOT virtual
|
|
(i.e. was NOT made using a BDB file format). For example, need to relate all particles in
|
|
generation_4_accounted.txt, with particles numbering 0-->n back to the original stack with
|
|
the same particle numbering.
|
|
'''
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import numpy
|
|
import EMAN2
|
|
|
|
def listToString(myList):
|
|
myStr = ""
|
|
for item in myList:
|
|
myStr += "%d,"%(item)
|
|
return myStr
|
|
|
|
def readGenerationFile(genFile):
|
|
t0 = time.time()
|
|
if not os.path.isfile(genFile):
|
|
print "File not found", genFile
|
|
return numpy.array([])
|
|
f = open(genFile, "r")
|
|
mylist = []
|
|
for line in f:
|
|
sint = int(line.lstrip())
|
|
mylist.append(sint)
|
|
f.close()
|
|
array = numpy.array(mylist)
|
|
#print "read file", time.time() -t0
|
|
return array
|
|
|
|
def trackParticles():
|
|
### input: number of generations & lists with particle numbers to exclude in each generation
|
|
|
|
### read generation files
|
|
genParamsAcct = {}
|
|
genParamsUnacct = {}
|
|
exdict = {}
|
|
eman2func = EMAN2.EMData()
|
|
numberOfGenerations = 19
|
|
bigClassDict = {}
|
|
f = open("generationClassMembers.csv", "w")
|
|
f.write("generation\tclassNum\tnumMembers\tmember\n")
|
|
t1 = time.time()
|
|
for i in range(numberOfGenerations):
|
|
t0 = time.time()
|
|
generation = i+1
|
|
genfileAcct = "generation_%d_accounted.txt" % (generation)
|
|
if not os.path.isfile(genfileAcct):
|
|
continue
|
|
genParamsAcct[generation] = readGenerationFile(genfileAcct)
|
|
genfileUnacct = "generation_%d_unaccounted.txt" % (generation)
|
|
genParamsUnacct[generation] = readGenerationFile(genfileUnacct)
|
|
print ( "Gen %d: Acct: %d / Unacct: %d = Total %d"
|
|
%(generation, len(genParamsAcct[generation]), len(genParamsUnacct[generation]),
|
|
len(genParamsAcct[generation])+ len(genParamsUnacct[generation])))
|
|
|
|
### read HDF file to get class members
|
|
headerOnly = True
|
|
classFile = "class_averages_generation_%d.hdf"%(generation)
|
|
classInfoList = eman2func.read_images(classFile, [], headerOnly)
|
|
bigClassDict[generation] = []
|
|
classNum = 0
|
|
for classInfo in classInfoList:
|
|
members = classInfo.get_attr('members')
|
|
members.sort()
|
|
adjGeneration = generation
|
|
while adjGeneration > 1:
|
|
### loop through accounted generations & find, for each particle, the corresponding match in
|
|
### previous unaccounted stack; also takes into account discounted members
|
|
adjGeneration -= 1
|
|
try:
|
|
members = genParamsUnacct[adjGeneration][members]
|
|
except KeyError:
|
|
pass
|
|
bigClassDict[generation].append(members)
|
|
classNum += 1
|
|
myStr = listToString(members)
|
|
f.write("%d\t%d\t%d\t%s\n"%(generation, classNum, len(members), myStr))
|
|
print time.time() - t0
|
|
f.close()
|
|
print time.time() - t1
|
|
|
|
if __name__ == "__main__"
|
|
trackParticles()
|