#
# Rajarshi Guha <rajarshi@presidency.com>
# 14/04/2005
#

import os, sys, string, getopt, tempfile

def usage():
    print """
    Usage: extractpdf.py [OPTIONS] file.pdf

    Extracts sequences of pages from a PDF file and dumps them to 
    a PDF file. The default name of the output PDF is extract.pdf
    and if no pages are specified the resultant file is empty. 
    It expects that pdflatex can be found in the users path.

    Possible options are

    -h,--help       This message
    -o,--output     The name of the output file
    -p,--pages      The pages to extract

    The specification for pages is that described in the manual
    for the pdfpages LaTeX package. Briefly, pages can be listed
    as a comma seperated list such as 3,4,7,10 or as range such as
    3-7. Combinations of these two forms may also be specified.

    Currently a wrong page specification will lead to this program 
    hanging as pdflatex will not exit.
    """

def get_tex_string(inputfile, pagespec):
    s = """
\documentclass{article}
\usepackage{pdfpages}
\\begin{document}
\includepdf[pages={%s}]{%s}
\end{document}
""" % (pagespec, inputfile)
    return s

if __name__ == '__main__':

    if len(sys.argv) == 1:
        usage()
        sys.exit(0)


    pagespec = ''
    outputfile = 'extract.pdf'


    try:
        opt,args = getopt.getopt(sys.argv[1:], 'o:p:h',\
        ['output=','pages=','help'])
    except getopt.GetoptError:
        usage()
        sys.exit(0)
    for o,a in opt:
        if o in ('-h','--help'):
            usage()
            sys.exit(0)
        if o in ('-o','--output'):
            outputfile = a
        if o in ('-p','--pages'):
            pagespec = a

    if pagespec == '':
        open('extract.pdf','w').close()
        sys.exit(0)
        
            
    inputfile = args[0]
    path = os.path.abspath(os.path.dirname(inputfile))
    inputfile = os.path.join(path,inputfile)

    texstring = get_tex_string(inputfile, pagespec)

    tmphandle, tmpname = tempfile.mkstemp(".tex")
    os.close(tmphandle)
    tmphandle = open(tmpname,'w')
    tmphandle.write(texstring)
    tmphandle.close()

    cwd = os.getcwd()
    os.chdir(tempfile.gettempdir())
    os.system('pdflatex %s 1>/dev/null' % (tmpname))
    os.system('cp %s %s' % ( string.split(tmpname,'.')[0]+'.pdf', os.path.join(cwd,outputfile)))
    os.system('rm -rf %s' % ( string.split(tmpname,'.')[0]+'.*'))
    os.chdir(cwd)