#
# Rajarshi Guha <rajarshi@presidency.com>
# 04/03/2004
#

import sys, string, copy, getopt
import HinFile

class Sorter :

	# Initialize self.__attribute .  It will be abused later.
	def __init__ (self) :
		self.__attribute = None

	# Notice how __compare is dependant upon self.__attribute
	def __compare (self, x, y) :
		return cmp(x[self.__attribute], y[self.__attribute])

	# Pass the sort function the __compare function defined above.
	def __call__ (self, data, attribute = None) :
		if attribute == None :
			data.sort()

		else :
			self.__attribute = attribute
			data.sort(self.__compare)

			# This is useful if you want to inline things.
			# I don't know how much innefficiency it introduces.
			# Is this just a new reference?  (that would be cool.)
			return data
def usage():
    print """
    Usage: morgan.py OPTIONS

    -h,--help           This messsage
    -i,--input FILE     Input file (HIN format)
    -v,--verbose        Verbose output

    This script converts the serial numbers of the atoms in the
    supplied HIN file to the canonical form using the basic Morgan
    algorithm. Currently if two or more atoms have the same EC values,
    they are simply numbered serially (ie, ordering via atom type or
    bond type is ignored).  Furthermore, oscillating EC values and
    atoms with isospectral points are not considered. Furthermore it
    operates on the full molecule (ie includes hydrogens)

    The program simply outputs the new and old serial numbers. A new
    HIN file is not written out. Its mainly a proof of concept code.

    The algorithm was implemented from the description in
    Cheminformatics, A Textbook by Gasteiger & Engel (Eds.), page 60.
    
    """

def num_unique( v ):
    l = []
    for i in v:
        if i not in l: l.append(i)
    return(len(l))

if __name__ == '__main__':

    if len(sys.argv) < 3:
        usage()
        sys.exit(0)


    verbose = 0
    hinfilename = ''
    
    try:
        opt,args = getopt.getopt(sys.argv[1:], 'i:hv',\
        ['input=','help','verbose'])
    except getopt.GetoptError:
        usage()
        sys.exit(0)

    for o,a in opt:
        if o in ('-h','--help'):
            usage()
            sys.exit(0)
        if o in ('-v','--verbose'):
            verbose = 1
        if o in ('-i','--input'):
            hinfilename = a
            

    sorter = Sorter()
    hinfile = HinFile.HinFileReader( hinfilename )


    # Get the serial numbers that were read in
    serial = []
    for atom in hinfile.atomlist:
        serial.append( atom['serial'] )
        

    # get initial EC values
    oldec = []
    for ser in serial:
        oldec.append( hinfile.get_numbond(ser) )
    
    nclass = num_unique( oldec )

    # start expanding the sphere
    currec = []
    while 1:
        for ser in serial:

            # get the serial number of the atoms connected to the current atom
            con = hinfile.get_connected_serials(ser)
            
            s = 0

            # loop over each connected atom and find its EC value and sum them
            for cs in con:
                idx = serial.index(cs)
                s = s + oldec[idx]
            
            # s thus has the EC value of the current atom in this sphere
            currec.append( s )

        # see if the number of EC's is <= number of EC's in the previous iter
        if num_unique( currec ) <= nclass:
            break
        else:
            nclass = num_unique( currec )
            oldec = copy.deepcopy( currec )
            currec = []

    if verbose:
        print 'Obtained EC values'
        
    # OK, num unique EC's converged                
    # Since we use the iteration where the highest number of EC's showed up
    # means we use oldec rather than currec (since we exited when 
    # currec <= oldec)
    sec  = zip(serial, oldec)
    sorter(sec,1).reverse()
    
    newserial = {}
    currserial = 1
    for s,e in sec:

        #set the new serial for the serial corresponding to this ec
        if verbose: print 'Renumbering serial %d' % (s)
        if str(s) not in newserial.keys():
            newserial[ str(s) ] = currserial
            currserial += 1
        else:
            if verbose: print '  Ignoring serial %d' %(s)

        # lets number the neighbors of this serial number
        cons = hinfile.get_connected_serials( s )
        if verbose: print '  Numbering nbrs: ' + str(cons)

        # make a list of ec values for these connected serials
        consec = []
        for c in cons:
            for ss,ee in sec:
                if ss == c: 
                    consec.append(ee)
                    break
        consec = zip(cons, consec)
        sorter(consec,1).reverse()
        for ss, ee in consec:
            if str(ss) not in newserial.keys():
                newserial[ str(ss) ] = currserial
                currserial += 1
            else:
                if verbose: print '    Ignoring serial %d' % (ss)
    
    newkeys = [int(x) for x in newserial.keys()]
    newkeys.sort()
    print 'old : new'
    for key in newkeys: print '%d : %d' % (key, newserial[str(key)])