Install PyMuPDF
python -m pip install --upgrade pymupdf
Here is the Source Code:
import fitz # PyMuPDF
def extract_highlighted_text(pdf_path):
highlighted_text = []
# Open the PDF file
pdf_document = fitz.open(pdf_path)
for page_num in range(pdf_document.page_count):
page = pdf_document[page_num]
print('Page -> ', page)
print('Text -> ', page.get_text())
# Get all the annotations on the page
annotations = page.annots()
for annot in annotations:
print(annot)
# Check if the annotation is a highlight
if annot.type[0] == 8: # 8 corresponds to a highlight annotation in PyMuPDF
highlight_text = annot.info["subject"]
highlighted_text.append(highlight_text)
# Close the PDF document
pdf_document.close()
return highlighted_text
# Usage example
pdf_path = 'INPUT_FILE.pdf'
highlighted_text = extract_highlighted_text(pdf_path)
for text in highlighted_text:
print(text)
Read more about: