#!/usr/local/bin/py # Rajarshi Guha # # 5/11/2002 import string, StringIO, sys import mailbox, email, re def fixup_href(matchobj): if not matchobj.group(0): return '' else: s = """ %s""" % (matchobj.group(0), matchobj.group(0)) return s def fixup_email(m): i = m.group(0) if not i: return '' else: s = """ %s """ % (i,i) return s def fixup_char(m): i = m.group(0) if i == '>': return '>' elif i == '<': return '<' elif i == '&': return '&' elif i == '\n': return '
\n' def html_mail(msg): if msg.is_multipart(): pass else: # Get the parts of the message body = msg.get_payload() if msg.has_key('Date'): date = msg['Date'] if msg.has_key('From'): frm = msg['From'] if msg.has_key('To'): to = msg['To'] else: to = '' if msg.has_key('Subject'): subject = msg['Subject'] else: subject = '' # Get email addresses out of the from & to fields frmmail = re.findall('[A-Za-z0-9-_\.]*@[A-Za-z0-9-_\.]*', frm)[0] try: tomail = re.findall('[A-Za-z0-9-_\.]*@[A-Za-z0-9-_\.]*', to) except: tomail = [] # process the to & from tokens to replce <,> etc frm = re.sub('[<>&]', fixup_char, frm) to = re.sub('[<>&]',fixup_char,to) # Make the HTML code for the frm & to lines frm = """ %s""" % (frmmail,frm) s = StringIO.StringIO() tm = zip(string.split(to,','),tomail) for i,j in tm: s.write(''+i+' ') to = s.getvalue() # process the body tp get proper HTML markup body = re.sub('[<>&\n]', fixup_char, body) body = re.sub('http://[A-Za-z0-9-=?/_\.]*', fixup_href, body) body = re.sub('[A-Za-z0-9-\.]*@[[A-Za-z0-9-\.]*',fixup_email,body) # Generate the HTML fragment for this mail s = """

%s

From: %s
To: %s
Date: %s

""" % (subject, frm, to, date, body) return(s) if __name__ == '__main__': if len(sys.argv) == 1: print """ mail.py MBOX_FILE Will convert all the mails in the user supplied file (Unix mbox format, the kind that KMail or mutt uses) into HTML emails (contained in the file called mail.html). Depends on the email module successfully parsing the mails. It will skip mails that the module cant handle. All email address'es & http:// references are converted to links, and the text is HTML'ized. Currently just dumps an HTML mail as HTML source rather than displaying the mail as an HTML page (anyway, people should'nt be sending HTML mail!) In addition multipart emails are not handled yet, skips them as well Basically a proof of concept for me :) """ sys.exit(0) f = open(sys.argv[1],'r') mbox = mailbox.UnixMailbox(f,email.message_from_file) f1 = open('mail.html','w') f1.write('\n') num = 0 while 1: num = num+1 try: msg = mbox.next() except email.Errors.HeaderParseError: print 'Current mail (num = '+str(num)+') seems to have a parse error. Skipping' continue if not msg: break if msg.is_multipart(): print 'Skipping a multipart email (num '+str(num)+')' continue s = html_mail(msg) if s: f1.write(s+'\n\n

') else: print 'Multipart messages not yet handled' f1.write('') f1.close()