-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathpdf2text.py
29 lines (23 loc) · 1.04 KB
/
pdf2text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# importing PyPDF2 module for extracting text from PDF files.
from PyPDF2 import PdfFileReader
# open the PDF file
# make sure the pdf file is in the same directory
# if the file is present in some other directory then one can provide the file path
pdfFile = open('pdf-file-name.pdf', 'rb')
# create PDFFileReader object to read the file
pdfReader = PdfFileReader(pdfFile)
# printing the title and name of the creator
print("PDF File name: " + str(pdfReader.getDocumentInfo().title))
print("PDF File created by: " + str(pdfReader.getDocumentInfo().creator))
print("- - - - - - - - - - - - - - - - - - - -")
# calculating the number of pages pdf consists of
numOfPages = pdfReader.getNumPages()
# running the for loop to convert pdf to text till the required no of pages
for i in range(0, numOfPages):
print("Page Number: " + str(i))
print("- - - - - - - - - - - - - - - - - - - -")
pageObj = pdfReader.getPage(i)
print(pageObj.extractText())
print("- - - - - - - - - - - - - - - - - - - -")
# close the PDF file object
pdfFile.close()