Skip to content

Commit 6d94e5b

Browse files
committed
pdfcat and pagerange.py: error handling, stdout, misc.
modified: LICENSE Added Steve Witham <switham_github@mac-guyver.com> modified: PyPDF2/pagerange.py Fixed some error handling. Made parse_filename_page_ranges return a list instead of being a generator. Tweaked a variable name for readability. renamed: Sample_Code/makepages.py -> Sample_Code/makesimple.py also modified: Renamed the code to be similar to makesimple.sh. Changed the output file names to be the same as makesimple.sh makes. modified: Sample_Code/pdfcat Fixed to allow sending output to a file by redirection like with cat, pdfcat input*.pdf >output.pdf using output = os.fdopen(stdout.fileno(), "wb") I have not tested this in Windows, but the Python docs say the calls are supported in Windows. The worst problem this could cause would be to silence PyPDF2's existing warning about the file being in text mode, while still actually writing in Windows text mode. Added examples to the --help output to show output-redirect style. Took out the caching of open input files. Needless complication. Added a try...except so that if there's a problem while processing an input file, print the filename. Made quiet the default and --verbose an option.
1 parent 201ad9f commit 6d94e5b

File tree

4 files changed

+47
-35
lines changed

4 files changed

+47
-35
lines changed

LICENSE

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
Copyright (c) 2006-2008, Mathieu Fenniak
22
Some contributions copyright (c) 2007, Ashish Kulkarni <kulkarni.ashish@gmail.com>
3+
Some contributions copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>
34

45
All rights reserved.
56

PyPDF2/pagerange.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -122,27 +122,29 @@ def indices(self, n):
122122

123123

124124
def parse_filename_page_ranges(args):
125-
"""
126-
Generate a sequence of (filename, page_range) pairs from a list
127-
of filenames and page ranges.
125+
"""
126+
Given a list of filenames and page ranges, return a list of
127+
(filename, page_range) pairs.
128128
First arg must be a filename; other ags are filenames, page-range
129129
expressions, slice objects, or PageRange objects.
130-
A filename not followed by page range indicates all pages of the file.
131-
Yields tuples like (pdf_filename, page_range).
130+
A filename not followed by a page range indicates all pages of the file.
132131
"""
132+
pairs = []
133133
pdf_filename = None
134-
did_some = False
134+
did_page_range = False
135135
for arg in args + [None]:
136136
if PageRange.valid(arg):
137137
if not pdf_filename:
138-
raise Error("First argument should be a filename.")
138+
raise ValueError("The first argument must be a filename, " \
139+
"not a page range.")
139140

140-
yield (pdf_filename, PageRange(arg))
141-
did_some = True
141+
pairs.append( (pdf_filename, PageRange(arg)) )
142+
did_page_range = True
142143
else:
143144
# New filename or end of list--do all of the previous file?
144-
if pdf_filename and not did_some:
145-
yield (pdf_filename, PAGE_RANGE_ALL)
145+
if pdf_filename and not did_page_range:
146+
pairs.append( (pdf_filename, PAGE_RANGE_ALL) )
146147

147148
pdf_filename = arg
148-
did_some = False
149+
did_page_range = False
150+
return pairs

Sample_Code/makepages.py Sample_Code/makesimple.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
TEXT = """%s page %d of %d
1111
1212
a wonderful file
13-
created with Sample_Code/makepages.py"""
13+
created with Sample_Code/makesimple.py"""
1414

1515
def make_pdf_file(output_filename, np):
1616
title = output_filename
@@ -30,6 +30,6 @@ def make_pdf_file(output_filename, np):
3030
nps = [None, 5, 11, 17]
3131
for i, np in enumerate(nps):
3232
if np:
33-
filename = "simplest_%d.pdf" % i
33+
filename = "simple%d.pdf" % i
3434
make_pdf_file(filename, np)
3535
print "Wrote", filename

Sample_Code/pdfcat

+30-21
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,16 @@ A file not followed by a page range means all the pages of the file.
77
88
PAGE RANGES are like Python slices.
99
{page_range_help}
10-
EXAMPLE
10+
EXAMPLES
1111
pdfcat -o output.pdf head.pdf content.pdf :6 7: tail.pdf -1
12-
Concatenate all of head.pdf, all but page seven of content.pdf,
13-
and the last page of tail.pdf, producing output.pdf.
12+
Concatenate all of head.pdf, all but page seven of content.pdf,
13+
and the last page of tail.pdf, producing output.pdf.
14+
15+
pdfcat chapter*.pdf >book.pdf
16+
You can specify the output file by redirection.
17+
18+
pdfcat chapter?.pdf chapter10.pdf >book.pdf
19+
In case you don't want chapter 10 before chapter 2.
1420
"""
1521
# Copyright (c) 2014, Steve Witham <switham_github@mac-guyver.com>.
1622
# All rights reserved. This software is available under a BSD license;
@@ -25,33 +31,36 @@ def parse_args():
2531
formatter_class=argparse.RawDescriptionHelpFormatter)
2632
parser.add_argument("args", nargs="+",
2733
metavar="filename or page range expression")
28-
parser.add_argument("-o", "--output", required=True,
34+
parser.add_argument("-o", "--output",
2935
metavar="output_file")
30-
parser.add_argument("-q", "--quiet", action="store_true")
36+
parser.add_argument("-v", "--verbose", action="store_true",
37+
help="show page ranges as they are being read")
3138
return parser.parse_args()
3239

33-
from sys import argv, stderr, stdout, exit
34-
import re
40+
from sys import stderr, stdout, exit
41+
import os
3542
import traceback
3643
from PyPDF2 import PdfFileMerger, parse_filename_page_ranges
3744

3845

3946
if __name__ == "__main__":
4047
args = parse_args()
41-
verbose = not(args.quiet)
42-
4348
filename_page_ranges = parse_filename_page_ranges(args.args)
49+
4450
merger = PdfFileMerger()
45-
input_files = {}
46-
for (filename, page_range) in filename_page_ranges:
47-
if verbose:
48-
print >>stderr, filename, page_range
49-
if filename not in input_files:
50-
input_files[filename] = open(filename, "rb")
51-
merger.append(input_files[filename], pages=page_range)
52-
for f in input_files.values():
53-
f.close()
54-
output = open(args.output, "wb")
51+
try:
52+
for (filename, page_range) in filename_page_ranges:
53+
if args.verbose:
54+
print >>stderr, filename, page_range
55+
with open(filename, "rb") as f:
56+
merger.append(f, pages=page_range)
57+
except:
58+
print >>stderr, traceback.format_exc()
59+
print >>stderr, "Error while reading " + filename
60+
exit(1)
61+
if args.output:
62+
output = open(args.output, "wb")
63+
else:
64+
stdout.flush()
65+
output = os.fdopen(stdout.fileno(), "wb")
5566
merger.write(output)
56-
57-

0 commit comments

Comments
 (0)