JAVA-29281 Create new module Text Processing Libraries Modules (#15479)

This commit is contained in:
anuragkumawat 2023-12-31 08:37:40 -08:00 committed by GitHub
parent 1768eee09f
commit 76bde3ff46
78 changed files with 208 additions and 189 deletions

12
pom.xml
View File

@ -686,7 +686,6 @@
<modules> <modules>
<module>akka-modules</module> <module>akka-modules</module>
<module>algorithms-modules</module> <module>algorithms-modules</module>
<module>antlr</module>
<module>apache-cxf-modules</module> <module>apache-cxf-modules</module>
<module>apache-httpclient-2</module> <module>apache-httpclient-2</module>
<module>apache-httpclient4</module> <module>apache-httpclient4</module>
@ -700,9 +699,7 @@
<module>apache-poi-3</module> <module>apache-poi-3</module>
<module>apache-poi</module> <module>apache-poi</module>
<module>apache-thrift</module> <module>apache-thrift</module>
<module>apache-tika</module>
<module>apache-velocity</module> <module>apache-velocity</module>
<module>asciidoctor</module>
<module>atomix</module> <module>atomix</module>
<module>aws-modules</module> <module>aws-modules</module>
<module>azure</module> <module>azure</module>
@ -813,8 +810,6 @@
<module>osgi</module> <module>osgi</module>
<module>parent-boot-3</module> <module>parent-boot-3</module>
<module>patterns-modules</module> <module>patterns-modules</module>
<module>pdf-2</module>
<module>pdf</module>
<module>performance-tests</module> <module>performance-tests</module>
<module>persistence-modules</module> <module>persistence-modules</module>
<!--<module>persistence-modules/java-harperdb</module>--> <!-- This module requires a library to download manually --> <!--<module>persistence-modules/java-harperdb</module>--> <!-- This module requires a library to download manually -->
@ -896,6 +891,7 @@
<module>tensorflow-java</module> <module>tensorflow-java</module>
<module>testing-modules</module> <module>testing-modules</module>
<module>testing-modules/mockito-simple</module> <module>testing-modules/mockito-simple</module>
<module>text-processing-libraries-modules</module>
<module>timefold-solver</module> <module>timefold-solver</module>
<module>vaadin</module> <module>vaadin</module>
<module>vavr-modules</module> <module>vavr-modules</module>
@ -940,7 +936,6 @@
<modules> <modules>
<module>akka-modules</module> <module>akka-modules</module>
<module>algorithms-modules</module> <module>algorithms-modules</module>
<module>antlr</module>
<module>apache-cxf-modules</module> <module>apache-cxf-modules</module>
<module>apache-httpclient-2</module> <module>apache-httpclient-2</module>
<module>apache-httpclient4</module> <module>apache-httpclient4</module>
@ -954,9 +949,7 @@
<module>apache-poi-3</module> <module>apache-poi-3</module>
<module>apache-poi</module> <module>apache-poi</module>
<module>apache-thrift</module> <module>apache-thrift</module>
<module>apache-tika</module>
<module>apache-velocity</module> <module>apache-velocity</module>
<module>asciidoctor</module>
<module>atomix</module> <module>atomix</module>
<module>aws-modules</module> <module>aws-modules</module>
<module>azure</module> <module>azure</module>
@ -1067,8 +1060,6 @@
<module>osgi</module> <module>osgi</module>
<module>parent-boot-3</module> <module>parent-boot-3</module>
<module>patterns-modules</module> <module>patterns-modules</module>
<module>pdf-2</module>
<module>pdf</module>
<module>performance-tests</module> <module>performance-tests</module>
<module>persistence-modules</module> <module>persistence-modules</module>
<module>persistence-modules/spring-data-neo4j</module> <module>persistence-modules/spring-data-neo4j</module>
@ -1148,6 +1139,7 @@
<module>tensorflow-java</module> <module>tensorflow-java</module>
<module>testing-modules</module> <module>testing-modules</module>
<module>testing-modules/mockito-simple</module> <module>testing-modules/mockito-simple</module>
<module>text-processing-libraries-modules</module>
<module>timefold-solver</module> <module>timefold-solver</module>
<module>vaadin</module> <module>vaadin</module>
<module>vavr-modules</module> <module>vavr-modules</module>

View File

@ -0,0 +1,3 @@
## Text Processing Libraries
This module contains modules about Text Processing Libraries.

View File

@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.baeldung</groupId> <groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId> <artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version> <version>1.0.0-SNAPSHOT</version>
</parent> </parent>

View File

@ -9,7 +9,7 @@
<parent> <parent>
<groupId>com.baeldung</groupId> <groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId> <artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version> <version>1.0.0-SNAPSHOT</version>
</parent> </parent>

View File

@ -8,7 +8,7 @@
<parent> <parent>
<groupId>com.baeldung</groupId> <groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId> <artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version> <version>1.0.0-SNAPSHOT</version>
</parent> </parent>

View File

@ -9,7 +9,7 @@
<parent> <parent>
<groupId>com.baeldung</groupId> <groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId> <artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version> <version>1.0.0-SNAPSHOT</version>
</parent> </parent>

View File

@ -1,43 +1,43 @@
package com.baeldung.pdfedition; package com.baeldung.pdfedition;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import com.itextpdf.kernel.geom.Rectangle; import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument; import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader; import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter; import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.pdfcleanup.CleanUpProperties; import com.itextpdf.pdfcleanup.CleanUpProperties;
import com.itextpdf.pdfcleanup.PdfCleanUpLocation; import com.itextpdf.pdfcleanup.PdfCleanUpLocation;
import com.itextpdf.pdfcleanup.PdfCleanUpTool; import com.itextpdf.pdfcleanup.PdfCleanUpTool;
import com.itextpdf.pdfcleanup.PdfCleaner; import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy; import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy; import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfContentRemover { public class PdfContentRemover {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf"; private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-cleaned.pdf"; private static final String DESTINATION = "src/main/resources/baeldung-cleaned.pdf";
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE); PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION); PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer); PdfDocument pdfDocument = new PdfDocument(reader, writer);
removeContentFromDocument(pdfDocument); removeContentFromDocument(pdfDocument);
pdfDocument.close(); pdfDocument.close();
} }
private static void removeContentFromDocument(PdfDocument pdfDocument) throws IOException { private static void removeContentFromDocument(PdfDocument pdfDocument) throws IOException {
// 5.1. remove text // 5.1. remove text
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy(); CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung")); strategy.add(new RegexBasedCleanupStrategy("Baeldung"));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy); PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
// 5.2. remove other areas // 5.2. remove other areas
List<PdfCleanUpLocation> cleanUpLocations = Arrays.asList(new PdfCleanUpLocation(1, new Rectangle(10, 50, 90, 70)), new PdfCleanUpLocation(2, new Rectangle(35, 400, 100, 35))); List<PdfCleanUpLocation> cleanUpLocations = Arrays.asList(new PdfCleanUpLocation(1, new Rectangle(10, 50, 90, 70)), new PdfCleanUpLocation(2, new Rectangle(35, 400, 100, 35)));
PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDocument, cleanUpLocations, new CleanUpProperties()); PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDocument, cleanUpLocations, new CleanUpProperties());
cleaner.cleanUp(); cleaner.cleanUp();
} }
} }

View File

@ -1,86 +1,86 @@
package com.baeldung.pdfedition; package com.baeldung.pdfedition;
import java.io.IOException; import java.io.IOException;
import java.net.MalformedURLException; import java.net.MalformedURLException;
import com.itextpdf.forms.PdfAcroForm; import com.itextpdf.forms.PdfAcroForm;
import com.itextpdf.forms.fields.PdfFormField; import com.itextpdf.forms.fields.PdfFormField;
import com.itextpdf.forms.fields.PdfTextFormField; import com.itextpdf.forms.fields.PdfTextFormField;
import com.itextpdf.io.image.ImageData; import com.itextpdf.io.image.ImageData;
import com.itextpdf.io.image.ImageDataFactory; import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.geom.Rectangle; import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument; import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader; import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfString; import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfWriter; import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.annot.PdfAnnotation; import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation; import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation;
import com.itextpdf.layout.Document; import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image; import com.itextpdf.layout.element.Image;
import com.itextpdf.layout.element.Paragraph; import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.element.Table; import com.itextpdf.layout.element.Table;
import com.itextpdf.layout.element.Text; import com.itextpdf.layout.element.Text;
import com.itextpdf.layout.properties.UnitValue; import com.itextpdf.layout.properties.UnitValue;
public class PdfEditor { public class PdfEditor {
private static final String SOURCE = "src/main/resources/baeldung.pdf"; private static final String SOURCE = "src/main/resources/baeldung.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-modified.pdf"; private static final String DESTINATION = "src/main/resources/baeldung-modified.pdf";
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE); PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION); PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer); PdfDocument pdfDocument = new PdfDocument(reader, writer);
addContentToDocument(pdfDocument); addContentToDocument(pdfDocument);
} }
private static void addContentToDocument(PdfDocument pdfDocument) throws MalformedURLException { private static void addContentToDocument(PdfDocument pdfDocument) throws MalformedURLException {
// 4.1. add form // 4.1. add form
PdfFormField personal = PdfFormField.createEmptyField(pdfDocument); PdfFormField personal = PdfFormField.createEmptyField(pdfDocument);
personal.setFieldName("information"); personal.setFieldName("information");
PdfTextFormField name = PdfFormField.createText(pdfDocument, new Rectangle(35, 400, 100, 30), "name", ""); PdfTextFormField name = PdfFormField.createText(pdfDocument, new Rectangle(35, 400, 100, 30), "name", "");
personal.addKid(name); personal.addKid(name);
PdfAcroForm.getAcroForm(pdfDocument, true) PdfAcroForm.getAcroForm(pdfDocument, true)
.addField(personal, pdfDocument.getFirstPage()); .addField(personal, pdfDocument.getFirstPage());
// 4.2. add new page // 4.2. add new page
pdfDocument.addNewPage(1); pdfDocument.addNewPage(1);
// 4.3. add annotation // 4.3. add annotation
PdfAnnotation ann = new PdfTextAnnotation(new Rectangle(40, 435, 0, 0)).setTitle(new PdfString("name")) PdfAnnotation ann = new PdfTextAnnotation(new Rectangle(40, 435, 0, 0)).setTitle(new PdfString("name"))
.setContents("Your name"); .setContents("Your name");
pdfDocument.getPage(2) pdfDocument.getPage(2)
.addAnnotation(ann); .addAnnotation(ann);
// create document form pdf document // create document form pdf document
Document document = new Document(pdfDocument); Document document = new Document(pdfDocument);
// 4.4. add an image // 4.4. add an image
ImageData imageData = ImageDataFactory.create("src/main/resources/baeldung.png"); ImageData imageData = ImageDataFactory.create("src/main/resources/baeldung.png");
Image image = new Image(imageData).scaleAbsolute(550, 100) Image image = new Image(imageData).scaleAbsolute(550, 100)
.setFixedPosition(1, 10, 50); .setFixedPosition(1, 10, 50);
document.add(image); document.add(image);
// 4.5. add a paragraph // 4.5. add a paragraph
Text title = new Text("This is a demo").setFontSize(16); Text title = new Text("This is a demo").setFontSize(16);
Text author = new Text("Baeldung tutorials."); Text author = new Text("Baeldung tutorials.");
Paragraph p = new Paragraph().setFontSize(8) Paragraph p = new Paragraph().setFontSize(8)
.add(title) .add(title)
.add(" from ") .add(" from ")
.add(author); .add(author);
document.add(p); document.add(p);
// 4.6. add a table // 4.6. add a table
Table table = new Table(UnitValue.createPercentArray(2)); Table table = new Table(UnitValue.createPercentArray(2));
table.addHeaderCell("#"); table.addHeaderCell("#");
table.addHeaderCell("company"); table.addHeaderCell("company");
table.addCell("name"); table.addCell("name");
table.addCell("baeldung"); table.addCell("baeldung");
document.add(table); document.add(table);
// close the document // close the document
// this automatically closes the pdfDocument, which then closes automatically the pdfReader and pdfWriter // this automatically closes the pdfDocument, which then closes automatically the pdfReader and pdfWriter
document.close(); document.close();
} }
} }

View File

@ -1,45 +1,45 @@
package com.baeldung.pdfedition; package com.baeldung.pdfedition;
import java.io.IOException; import java.io.IOException;
import com.itextpdf.kernel.colors.ColorConstants; import com.itextpdf.kernel.colors.ColorConstants;
import com.itextpdf.kernel.pdf.PdfDocument; import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage; import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader; import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter; import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas; import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation; import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
import com.itextpdf.layout.Canvas; import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph; import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.pdfcleanup.PdfCleaner; import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy; import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy; import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfTextReplacement { public class PdfTextReplacement {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf"; private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-fixed.pdf"; private static final String DESTINATION = "src/main/resources/baeldung-fixed.pdf";
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE); PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION); PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer); PdfDocument pdfDocument = new PdfDocument(reader, writer);
replaceTextContentFromDocument(pdfDocument); replaceTextContentFromDocument(pdfDocument);
pdfDocument.close(); pdfDocument.close();
} }
private static void replaceTextContentFromDocument(PdfDocument pdfDocument) throws IOException { private static void replaceTextContentFromDocument(PdfDocument pdfDocument) throws IOException {
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy(); CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung tutorials").setRedactionColor(ColorConstants.WHITE)); strategy.add(new RegexBasedCleanupStrategy("Baeldung tutorials").setRedactionColor(ColorConstants.WHITE));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy); PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
for (IPdfTextLocation location : strategy.getResultantLocations()) { for (IPdfTextLocation location : strategy.getResultantLocations()) {
PdfPage page = pdfDocument.getPage(location.getPageNumber() + 1); PdfPage page = pdfDocument.getPage(location.getPageNumber() + 1);
PdfCanvas pdfCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), page.getDocument()); PdfCanvas pdfCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), page.getDocument());
Canvas canvas = new Canvas(pdfCanvas, location.getRectangle()); Canvas canvas = new Canvas(pdfCanvas, location.getRectangle());
canvas.add(new Paragraph("HIDDEN").setFontSize(8) canvas.add(new Paragraph("HIDDEN").setFontSize(8)
.setMarginTop(0f)); .setMarginTop(0f));
} }
} }
} }

View File

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

@ -9,7 +9,7 @@
<parent> <parent>
<groupId>com.baeldung</groupId> <groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId> <artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version> <version>1.0.0-SNAPSHOT</version>
</parent> </parent>

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>text-processing-libraries-modules</artifactId>
<name>text-processing-libraries-modules</name>
<packaging>pom</packaging>
<parent>
<artifactId>parent-modules</artifactId>
<groupId>com.baeldung</groupId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<modules>
<module>antlr</module>
<module>apache-tika</module>
<module>asciidoctor</module>
<module>pdf</module>
<module>pdf-2</module>
</modules>
</project>