BAEL-5866 Reading PDF File using Java (#13403)

This commit is contained in:
Michael Olayemi 2023-02-04 07:50:37 +01:00 committed by GitHub
parent 58f8225215
commit a70afe6a9f
2 changed files with 52 additions and 0 deletions

BIN
pdf/sample.pdf Normal file

Binary file not shown.

View File

@ -0,0 +1,52 @@
package com.baeldung.pdfreadertest;
import static org.junit.jupiter.api.Assertions.*;
import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.junit.jupiter.api.Test;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.parser.PdfTextExtractor;
class ReadPdfFileUnitTest {
@Test
public void givenSamplePdf_whenUsingApachePdfBox_thenCompareOutput() throws IOException {
String expectedText = "Hello World!\n";
File file = new File("sample.pdf");
PDDocument document = PDDocument.load(file);
PDFTextStripper stripper = new PDFTextStripper();
String text = stripper.getText(document);
document.close();
assertEquals(expectedText, text);
}
@Test
public void givenSamplePdf_whenUsingiTextPdf_thenCompareOutput() throws IOException {
String expectedText = "Hello World!";
PdfReader reader = new PdfReader("sample.pdf");
int pages = reader.getNumberOfPages();
StringBuilder text = new StringBuilder();
for (int i = 1; i <= pages; i++) {
text.append(PdfTextExtractor.getTextFromPage(reader, i));
}
reader.close();
assertEquals(expectedText, text.toString());
}
}