# hydrosurf.py # # Combines data from exlogp.perl and a HIN file to # produce a PDB file with hydrophobicity values in # the B factor column which is then displayed and colored # by PyMOL # Rajarshi Guha December 2002 # # # # NOTE: if you use the hue option then blue = lowest B value # and red is highest B value. The same applies if you use the rainbow # option. I think that its easier to understand the hue option. # # Update: 23/04/04 - the hydrophobicity values are now scaled # to lie between -1 and 1, allowing multiple molecules to be # colored on the same scale from pymol import cmd import string, os, sys, StringIO import tempfile def load_cpsa_data(filename = None): cdata = [] if not filename: print """ Error loading the data file """ sys.exit(0) try: f = open(filename,'r') except IOError: print """ Error loading the data file """ sys.exit(0) for i in range(0,7): f.readline() while 1: line = f.readline() if not line: break if line[0] == '\n': continue line = string.split(line) if line[0] == '9999': break cdata.append( (int(line[0]), int(line[1]), float(line[2])) ) f.close() return cdata def load_hpsa_data(filename = None): # hdata[] is a list of tuples of the form # (atom num,atom type, value) # atom num corresponds to the values in the hin file hdata = [] if not filename: print """ Error loading the data file """ sys.exit(0) try: f = open(filename,'r') except IOError: print """ Error loading the data file """ sys.exit(0) # while(1): # line = f.readline() # if string.find(f,'HYDROPHOBICITY DATA') >= 0: break f.readline() f.readline() while 1: line = f.readline() if not line: break if line[0] == '\n': continue line = string.split(line) if line[0] == '9999': break print line hdata.append( (int(line[0]), int(line[1]), float(line[2])) ) f.close() return hdata def load_fukui_out(filename = None,ftype = 'FUKUIP'): if not filename: print """ Error loading the data file """ sys.exit(0) try: f = open(filename,'r') except IOError: print """ Error loading the data file """ sys.exit(0) f.readline() f.readline() line = string.split(f.readline()) natom = int(line[2]) hdata = [] for i in range(0,natom): line = string.split(f.readline()) if ftype == 'FUKUIP': fval = float(line[2]) elif ftype == 'FUKUIM': fval = float(line[3]) elif ftype == 'FUKUIZ': fval = float(line[4]) hdata.append( (int(line[0]), line[1], fval) ) return(hdata) def scale_vals( hdata ): hy = [ float(z) for x,y,z in hdata ] rng = max(hy) - min(hy) shft = -1 * rng / 2 - min(hy) scl = 2 / rng hy = map( lambda x: scl*(x+shft), hy) nums = [ float(x) for x,y,z in hdata ] types = [ float(y) for x,y,z in hdata ] hdata = zip(nums, types, hy) return hdata def hydrosurf(data = '', hin = '', rbow = 0, nbins = 2, datatype='HPSA'): """ ATHOR Rajarshi Guha USAGE hydro_surf(datafile, hinfile, colorstyle, nbins, datatype='HPSA') This function uses the color_b.py script to color a molecule using hydrophobicity values calculated using the exlogp.perl script by Brian Mattioni. datafile - the output file from exlogp.perl hinfile - the HIN file that exlogp.perl used Both these values should be strings. colorstyle - 0 for continuous hues from blue to magenta, or else 1 for colors of the rainbow. nbins - the number of colors to use when coloring the molecule datatype - Can be HPSA, CPSA or FUKUI and is used to specify whether the data came from HPSA, CPSA or FUKUI descriptors. (The default is HPSA and if you just want to veiw hydrophobicity surfaces this is OK) Using it in PyMOL: run hydrosurf.py - loads the function definition hydrosurf('datafile.dat','mol.hin',0,2) - runs the function (Replace the two filenames with whatever you have) It will need the color_b.py script to do the coloring """ if data == '' or hin == '': print 'You must supply the hydrophobicity values and the actual hin file' sys.exit(0) if nbins < 2: nbins == 2 try: cmd.do('delete '+string.split(hin,'.')[0]+'.pdb') except: pass if datatype == 'HPSA': hdata = load_hpsa_data(data) elif string.find(datatype,'FUKUI') >= 0: hdata = load_fukui_out(data,datatype) else: hdata = load_cpsa_data(data) # scale the hy values to be between -1 nd 1 hdata = scale_vals(hdata) # Convert the hin to a pdb pdbfile = tempfile.mktemp()+'.pdb' os.system('babel -ihin '+hin+' -opdb '+pdbfile) fin = open(pdbfile,'r') fout = StringIO.StringIO() c = 0 while 1: line = fin.readline() if not line: break if string.find(line,'ATOM') == 0: anum,atype,aval = hdata[c] line = line[:62] + str(round(aval,6)) + '\n' c += 1 fout.write(line) else: fout.write(line) fin.close() os.system('rm '+pdbfile) cmd.read_pdbstr(fout.getvalue(),string.split(hin,'.')[0]+'.pdb') fout.close() cmd.do('run color_bh.py') cmd.do('color_bh(\'(all)\', ramp=0,rainbow=%d,nbins=%d)' % (rbow,nbins)) #cmd.do('color_b(selection=\'all\', ramp=1,rainbow=%d,nbins=%d)' % (rbow,nbins))