pip install PyPDF2 import PyPDF2 pdfFileObject=open(r"F:\fileName.pdf",'rb') pdfReader = PyPDF2.PdfFileReader(pdfFileObject) //Creating reader obj print(" No. Of Pages :", pdfReader.numPages)//To know no.of pages
#!pip install tabula-py import tabula #read all table data df = tabula.read_pdf("sample.pdf",pages=[1,2]) df[1] #tabula.convert_into("sample.pdf", "sample.csv", output_format="csv")
import PyPDF2 pdfFileObject = open(r"F:\pdf.pdf", 'rb') pdfReader = PyPDF2.PdfFileReader(pdfFileObject) print(" No. Of Pages :", pdfReader.numPages) pageObject = pdfReader.getPage(0) print(pageObject.extractText()) pdfFileObject.close()