import string import sys class HinFile: """ Two main data structs in this class: atomlist[] - a list whose each element is a dictionary. The keys to the dictionary are: serial - serial number of the atom in the original file type - the atom type coord - a 3 item list containing the x,y,z coords charge - the charge on the specified atom numbond - the number of bonds this atom has connections - a list of the connected atom and type of bond, this dict item is not really needed, might as well get rid of it. connections[] - a list whose individual elements are dicts. The dicts are keyed by the atom serial number and the value of the key is a list containing 2 item lists, ie [atom number to which it is connected, type] So, connections[] might look like: [{1: [(2, 's')]}, {2: [(1, 's'), (3, 's'), (14, 's')]}, {3: [(2, 's'), (4, 's')]}, {4: [(3, 's'), (5, 'd'), (6, 's')]}, {5: [(4, 'd')]}, .....] Methods: All the get_XXX() methods take a serial number as an arg get_type(), get_numbond(), get_coord(), get_connections() get_bond_list(include_hydrogen) - does not need a serial number, but the single argument can be 1 or 0 to include_hydrogens in the list or no. """ def __init__(self, l): self.status = 0 self.errmesg = None self.atomlist = [] self.connections = [] self.tag = l[0] l = l[1:] for line in l: adict = {} if string.find(line,'endmol') != -1: break line = string.split(line) adict['serial'] = int(line[1]) adict['type'] = line[3] adict['coord'] = [float(line[7]), float(line[8]), float(line[9])] adict['charge'] = float(line[6]) adict['numbond'] = int(line[10]) adict['connections'] = line[11:] self.atomlist.append(adict) for i in self.atomlist: iterator = iter(i['connections']) t = [] for j in iterator: t.append((int(j), iterator.next())) (self.connections).append({int(i['serial']):t}) def get_type(self, serial): for i in self.atomlist: if i['serial'] == int(serial): return i['type'] def get_numbond(self, serial): for i in self.atomlist: if int(serial) == i['serial']: return i['numbond'] def get_coord(self, serial): for i in self.atomlist: if int(serial) == i['serial']: return i['coord'] def get_connections(self, serial): for i in self.atomlist: if int(serial) == i['serial']: return i['connections'] def get_bond_list(self, include_hydrogens): """ get_bond_list(include_hydrogens) Will return a list of 2 tuples listing all the bond pairs present (ie start element serial, end element serial) If include_hydrogens = 1, then bonds involving hydrogen are included, other wise only bonds between heavy atoms are returned. """ bonds = [] for i in self.connections: serial = i.keys()[0] if not include_hydrogens and self.get_type(serial) == 'H': continue con = i[serial] for s,t in con: if not include_hydrogens and self.get_type(s) == 'H': continue bonds.append( (serial,s) ) bb = bonds for i in bonds: a,b = i bb.remove( (b,a) ) return bb #################################################### def HinFileReader(filename): hinfilelist = [] try: f = open(filename,'r') except IOError: print 'File I/O Error' return None mol = [] for line in f: if not line: break if (string.find(line,'endmol') != -1): hinfilelist.append(HinFile(mol)) mol = [] continue if string.find(line,'atom') == 0 or string.find(line,'mol') == 0: mol.append(line) if len(hinfilelist) == 1: return(hinfilelist[0]) else: return(hinfilelist)