Fixes to PDF module (#807)
* PDF to X * PDF to X * Remove created doc * Code fixes and cleanup for PDF module * Fix web.xml in spring-mvc-web-vs-initializer project * Rollback web.xml * Fixes to PDF article
This commit is contained in:
parent
c29601db41
commit
04d33298ac
18
pdf/pom.xml
18
pdf/pom.xml
|
@ -24,49 +24,31 @@
|
|||
<version>3.8.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox -->
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox</artifactId>
|
||||
<version>2.0.3</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox-tools -->
|
||||
<dependency>
|
||||
<groupId>org.apache.pdfbox</groupId>
|
||||
<artifactId>pdfbox-tools</artifactId>
|
||||
<version>2.0.3</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/net.sf.cssbox/pdf2dom -->
|
||||
<dependency>
|
||||
<groupId>net.sf.cssbox</groupId>
|
||||
<artifactId>pdf2dom</artifactId>
|
||||
<version>1.6</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
|
||||
<dependency>
|
||||
<groupId>com.itextpdf</groupId>
|
||||
<artifactId>itextpdf</artifactId>
|
||||
<version>5.5.10</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi</artifactId>
|
||||
<version>3.15</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-ooxml -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-ooxml</artifactId>
|
||||
<version>3.15</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.poi/poi-scratchpad -->
|
||||
<dependency>
|
||||
<groupId>org.apache.poi</groupId>
|
||||
<artifactId>poi-scratchpad</artifactId>
|
||||
<version>3.15</version>
|
||||
</dependency>
|
||||
<!-- https://mvnrepository.com/artifact/org.apache.xmlgraphics/batik-transcoder -->
|
||||
<dependency>
|
||||
<groupId>org.apache.xmlgraphics</groupId>
|
||||
<artifactId>batik-transcoder</artifactId>
|
||||
|
|
|
@ -23,17 +23,13 @@ public class PDF2HTMLExample {
|
|||
}
|
||||
|
||||
private static void generateHTMLFromPDF(String filename) throws ParserConfigurationException, IOException {
|
||||
try {
|
||||
PDDocument pdf = PDDocument.load(new File(filename));
|
||||
PDFDomTree parser = new PDFDomTree();
|
||||
Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
|
||||
parser.writeText(pdf, output);
|
||||
output.close();
|
||||
if (pdf != null) {
|
||||
pdf.close();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
PDDocument pdf = PDDocument.load(new File(filename));
|
||||
PDFDomTree parser = new PDFDomTree();
|
||||
Writer output = new PrintWriter("src/output/pdf.html", "utf-8");
|
||||
parser.writeText(pdf, output);
|
||||
output.close();
|
||||
if (pdf != null) {
|
||||
pdf.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,30 +23,26 @@ public class PDF2TextExample {
|
|||
}
|
||||
|
||||
private static void generateTxtFromPDF(String filename) throws IOException {
|
||||
try {
|
||||
File f = new File(filename);
|
||||
String parsedText;
|
||||
PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
|
||||
parser.parse();
|
||||
File f = new File(filename);
|
||||
String parsedText;
|
||||
PDFParser parser = new PDFParser(new RandomAccessFile(f, "r"));
|
||||
parser.parse();
|
||||
|
||||
COSDocument cosDoc = parser.getDocument();
|
||||
COSDocument cosDoc = parser.getDocument();
|
||||
|
||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||
PDDocument pdDoc = new PDDocument(cosDoc);
|
||||
PDFTextStripper pdfStripper = new PDFTextStripper();
|
||||
PDDocument pdDoc = new PDDocument(cosDoc);
|
||||
|
||||
parsedText = pdfStripper.getText(pdDoc);
|
||||
parsedText = pdfStripper.getText(pdDoc);
|
||||
|
||||
if (cosDoc != null)
|
||||
cosDoc.close();
|
||||
if (pdDoc != null)
|
||||
pdDoc.close();
|
||||
|
||||
PrintWriter pw = new PrintWriter("src/output/pdf.txt");
|
||||
pw.print(parsedText);
|
||||
pw.close();
|
||||
} catch (Exception e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
if (cosDoc != null)
|
||||
cosDoc.close();
|
||||
if (pdDoc != null)
|
||||
pdDoc.close();
|
||||
|
||||
PrintWriter pw = new PrintWriter("src/output/pdf.txt");
|
||||
pw.print(parsedText);
|
||||
pw.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue