# # Rajarshi Guha # 14/04/2005 # import os, sys, string, getopt, tempfile def usage(): print """ Usage: extractpdf.py [OPTIONS] file.pdf Extracts sequences of pages from a PDF file and dumps them to a PDF file. The default name of the output PDF is extract.pdf and if no pages are specified the resultant file is empty. It expects that pdflatex can be found in the users path. Possible options are -h,--help This message -o,--output The name of the output file -p,--pages The pages to extract The specification for pages is that described in the manual for the pdfpages LaTeX package. Briefly, pages can be listed as a comma seperated list such as 3,4,7,10 or as range such as 3-7. Combinations of these two forms may also be specified. Currently a wrong page specification will lead to this program hanging as pdflatex will not exit. """ def get_tex_string(inputfile, pagespec): s = """ \documentclass{article} \usepackage{pdfpages} \\begin{document} \includepdf[pages={%s}]{%s} \end{document} """ % (pagespec, inputfile) return s if __name__ == '__main__': if len(sys.argv) == 1: usage() sys.exit(0) pagespec = '' outputfile = 'extract.pdf' try: opt,args = getopt.getopt(sys.argv[1:], 'o:p:h',\ ['output=','pages=','help']) except getopt.GetoptError: usage() sys.exit(0) for o,a in opt: if o in ('-h','--help'): usage() sys.exit(0) if o in ('-o','--output'): outputfile = a if o in ('-p','--pages'): pagespec = a if pagespec == '': open('extract.pdf','w').close() sys.exit(0) inputfile = args[0] path = os.path.abspath(os.path.dirname(inputfile)) inputfile = os.path.join(path,inputfile) texstring = get_tex_string(inputfile, pagespec) tmphandle, tmpname = tempfile.mkstemp(".tex") os.close(tmphandle) tmphandle = open(tmpname,'w') tmphandle.write(texstring) tmphandle.close() cwd = os.getcwd() os.chdir(tempfile.gettempdir()) os.system('pdflatex %s 1>/dev/null' % (tmpname)) os.system('cp %s %s' % ( string.split(tmpname,'.')[0]+'.pdf', os.path.join(cwd,outputfile))) os.system('rm -rf %s' % ( string.split(tmpname,'.')[0]+'.*')) os.chdir(cwd)