#!/usr/local/bin/py import sys import os import string import math import random,copy import dan import setbinmod def stripreturn(l): if l[len(l)-1] == '\n': return(l[:len(l)-1]) return(l) def scramblelist(list, iter = 10): for i in range(0,iter): random.shuffle(list) return(list) ############################################################## # # Currently I assume all dans start from 1 and increment # sequentially. So make sure it is like this! if len(sys.argv) != 2: print """ Must supply the depv.txt file. Also note that the program assumes that your hin/mol files are numbered sequentially starting from 1 """ sys.exit(-1) print tsetp = float(raw_input("Enter tset percentage: ")) psetp = float(raw_input("Enter pset percentage: ")) csetp = float(raw_input("Enter cvset percentage: ")) # read in all dans and associated class f = open(sys.argv[1],'r') danlist = [] di = 1 for line in f: d = str(di) v = float(line) danlist.append( (d,v) ) di += 1 f.close() # Now make a tranlation table from the dans and # from here on only use the tranlated forms (ie integers) old2new, new2old = dan.dan_translation([d for d,v in danlist]) # Need todo this to find out the number of hin/mol files present dandir = raw_input('Enter the directory where all the hin files are: ') try: danflist = os.listdir(dandir) except OSError: print 'Error: Directory '+dandir+' does not exist' # GEt rid of any non hin/mol files d = [] for i in danflist: if string.find(i,".hin") >= 0: d.append(i) danflist = copy.deepcopy(d) # From here on, work with internal numberings # nummol = len(danflist) numt = int(math.ceil(tsetp/100 * nummol)) nump = int(math.ceil(psetp/100 * nummol)) numc = nummol - numt - nump print print 'Total Number of molecules = '+str(nummol) print 'TSET = '+str(numt)+' PSET = '+str(nump)+' CVSET = '+str(numc) scramblelist(danlist) bins,probs = setbinmod.makebin( [x for x,y in danlist] , [y for x,y in danlist], 10) tset, pset, cset = setbinmod.generate_sets(bins, probs, numt, nump, numc, [x for x,y in danlist] , [y for x,y in danlist]) # Since the dans are all str's (see above) we convert them to ints # and sort - just sugar :) tset = [int(x) for x in tset] tset.sort() pset = [int(x) for x in pset] pset.sort() cset = [int(x) for x in cset] cset.sort() # Now write them out!! setbinmod.print_set(tset, pset, cset, 'tsets.in',1) setbinmod.print_set(tset, pset, cset, 'tsets.in.txt',2)