JAVA-29281 Create new module Text Processing Libraries Modules (#15479)

This commit is contained in:
anuragkumawat 2023-12-31 08:37:40 -08:00 committed by GitHub
parent 1768eee09f
commit 76bde3ff46
78 changed files with 208 additions and 189 deletions

12
pom.xml
View File

@ -686,7 +686,6 @@
<modules>
<module>akka-modules</module>
<module>algorithms-modules</module>
<module>antlr</module>
<module>apache-cxf-modules</module>
<module>apache-httpclient-2</module>
<module>apache-httpclient4</module>
@ -700,9 +699,7 @@
<module>apache-poi-3</module>
<module>apache-poi</module>
<module>apache-thrift</module>
<module>apache-tika</module>
<module>apache-velocity</module>
<module>asciidoctor</module>
<module>atomix</module>
<module>aws-modules</module>
<module>azure</module>
@ -813,8 +810,6 @@
<module>osgi</module>
<module>parent-boot-3</module>
<module>patterns-modules</module>
<module>pdf-2</module>
<module>pdf</module>
<module>performance-tests</module>
<module>persistence-modules</module>
<!--<module>persistence-modules/java-harperdb</module>--> <!-- This module requires a library to download manually -->
@ -896,6 +891,7 @@
<module>tensorflow-java</module>
<module>testing-modules</module>
<module>testing-modules/mockito-simple</module>
<module>text-processing-libraries-modules</module>
<module>timefold-solver</module>
<module>vaadin</module>
<module>vavr-modules</module>
@ -940,7 +936,6 @@
<modules>
<module>akka-modules</module>
<module>algorithms-modules</module>
<module>antlr</module>
<module>apache-cxf-modules</module>
<module>apache-httpclient-2</module>
<module>apache-httpclient4</module>
@ -954,9 +949,7 @@
<module>apache-poi-3</module>
<module>apache-poi</module>
<module>apache-thrift</module>
<module>apache-tika</module>
<module>apache-velocity</module>
<module>asciidoctor</module>
<module>atomix</module>
<module>aws-modules</module>
<module>azure</module>
@ -1067,8 +1060,6 @@
<module>osgi</module>
<module>parent-boot-3</module>
<module>patterns-modules</module>
<module>pdf-2</module>
<module>pdf</module>
<module>performance-tests</module>
<module>persistence-modules</module>
<module>persistence-modules/spring-data-neo4j</module>
@ -1148,6 +1139,7 @@
<module>tensorflow-java</module>
<module>testing-modules</module>
<module>testing-modules/mockito-simple</module>
<module>text-processing-libraries-modules</module>
<module>timefold-solver</module>
<module>vaadin</module>
<module>vavr-modules</module>

View File

@ -0,0 +1,3 @@
## Text Processing Libraries
This module contains modules about Text Processing Libraries.

View File

@ -8,7 +8,7 @@
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

View File

@ -9,7 +9,7 @@
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

View File

@ -8,7 +8,7 @@
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

View File

@ -9,7 +9,7 @@
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

View File

@ -1,43 +1,43 @@
package com.baeldung.pdfedition;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.pdfcleanup.CleanUpProperties;
import com.itextpdf.pdfcleanup.PdfCleanUpLocation;
import com.itextpdf.pdfcleanup.PdfCleanUpTool;
import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfContentRemover {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-cleaned.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
removeContentFromDocument(pdfDocument);
pdfDocument.close();
}
private static void removeContentFromDocument(PdfDocument pdfDocument) throws IOException {
// 5.1. remove text
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung"));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
// 5.2. remove other areas
List<PdfCleanUpLocation> cleanUpLocations = Arrays.asList(new PdfCleanUpLocation(1, new Rectangle(10, 50, 90, 70)), new PdfCleanUpLocation(2, new Rectangle(35, 400, 100, 35)));
PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDocument, cleanUpLocations, new CleanUpProperties());
cleaner.cleanUp();
}
}
package com.baeldung.pdfedition;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.pdfcleanup.CleanUpProperties;
import com.itextpdf.pdfcleanup.PdfCleanUpLocation;
import com.itextpdf.pdfcleanup.PdfCleanUpTool;
import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfContentRemover {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-cleaned.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
removeContentFromDocument(pdfDocument);
pdfDocument.close();
}
private static void removeContentFromDocument(PdfDocument pdfDocument) throws IOException {
// 5.1. remove text
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung"));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
// 5.2. remove other areas
List<PdfCleanUpLocation> cleanUpLocations = Arrays.asList(new PdfCleanUpLocation(1, new Rectangle(10, 50, 90, 70)), new PdfCleanUpLocation(2, new Rectangle(35, 400, 100, 35)));
PdfCleanUpTool cleaner = new PdfCleanUpTool(pdfDocument, cleanUpLocations, new CleanUpProperties());
cleaner.cleanUp();
}
}

View File

@ -1,86 +1,86 @@
package com.baeldung.pdfedition;
import java.io.IOException;
import java.net.MalformedURLException;
import com.itextpdf.forms.PdfAcroForm;
import com.itextpdf.forms.fields.PdfFormField;
import com.itextpdf.forms.fields.PdfTextFormField;
import com.itextpdf.io.image.ImageData;
import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.element.Table;
import com.itextpdf.layout.element.Text;
import com.itextpdf.layout.properties.UnitValue;
public class PdfEditor {
private static final String SOURCE = "src/main/resources/baeldung.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-modified.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
addContentToDocument(pdfDocument);
}
private static void addContentToDocument(PdfDocument pdfDocument) throws MalformedURLException {
// 4.1. add form
PdfFormField personal = PdfFormField.createEmptyField(pdfDocument);
personal.setFieldName("information");
PdfTextFormField name = PdfFormField.createText(pdfDocument, new Rectangle(35, 400, 100, 30), "name", "");
personal.addKid(name);
PdfAcroForm.getAcroForm(pdfDocument, true)
.addField(personal, pdfDocument.getFirstPage());
// 4.2. add new page
pdfDocument.addNewPage(1);
// 4.3. add annotation
PdfAnnotation ann = new PdfTextAnnotation(new Rectangle(40, 435, 0, 0)).setTitle(new PdfString("name"))
.setContents("Your name");
pdfDocument.getPage(2)
.addAnnotation(ann);
// create document form pdf document
Document document = new Document(pdfDocument);
// 4.4. add an image
ImageData imageData = ImageDataFactory.create("src/main/resources/baeldung.png");
Image image = new Image(imageData).scaleAbsolute(550, 100)
.setFixedPosition(1, 10, 50);
document.add(image);
// 4.5. add a paragraph
Text title = new Text("This is a demo").setFontSize(16);
Text author = new Text("Baeldung tutorials.");
Paragraph p = new Paragraph().setFontSize(8)
.add(title)
.add(" from ")
.add(author);
document.add(p);
// 4.6. add a table
Table table = new Table(UnitValue.createPercentArray(2));
table.addHeaderCell("#");
table.addHeaderCell("company");
table.addCell("name");
table.addCell("baeldung");
document.add(table);
// close the document
// this automatically closes the pdfDocument, which then closes automatically the pdfReader and pdfWriter
document.close();
}
}
package com.baeldung.pdfedition;
import java.io.IOException;
import java.net.MalformedURLException;
import com.itextpdf.forms.PdfAcroForm;
import com.itextpdf.forms.fields.PdfFormField;
import com.itextpdf.forms.fields.PdfTextFormField;
import com.itextpdf.io.image.ImageData;
import com.itextpdf.io.image.ImageDataFactory;
import com.itextpdf.kernel.geom.Rectangle;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfString;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.annot.PdfAnnotation;
import com.itextpdf.kernel.pdf.annot.PdfTextAnnotation;
import com.itextpdf.layout.Document;
import com.itextpdf.layout.element.Image;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.layout.element.Table;
import com.itextpdf.layout.element.Text;
import com.itextpdf.layout.properties.UnitValue;
public class PdfEditor {
private static final String SOURCE = "src/main/resources/baeldung.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-modified.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
addContentToDocument(pdfDocument);
}
private static void addContentToDocument(PdfDocument pdfDocument) throws MalformedURLException {
// 4.1. add form
PdfFormField personal = PdfFormField.createEmptyField(pdfDocument);
personal.setFieldName("information");
PdfTextFormField name = PdfFormField.createText(pdfDocument, new Rectangle(35, 400, 100, 30), "name", "");
personal.addKid(name);
PdfAcroForm.getAcroForm(pdfDocument, true)
.addField(personal, pdfDocument.getFirstPage());
// 4.2. add new page
pdfDocument.addNewPage(1);
// 4.3. add annotation
PdfAnnotation ann = new PdfTextAnnotation(new Rectangle(40, 435, 0, 0)).setTitle(new PdfString("name"))
.setContents("Your name");
pdfDocument.getPage(2)
.addAnnotation(ann);
// create document form pdf document
Document document = new Document(pdfDocument);
// 4.4. add an image
ImageData imageData = ImageDataFactory.create("src/main/resources/baeldung.png");
Image image = new Image(imageData).scaleAbsolute(550, 100)
.setFixedPosition(1, 10, 50);
document.add(image);
// 4.5. add a paragraph
Text title = new Text("This is a demo").setFontSize(16);
Text author = new Text("Baeldung tutorials.");
Paragraph p = new Paragraph().setFontSize(8)
.add(title)
.add(" from ")
.add(author);
document.add(p);
// 4.6. add a table
Table table = new Table(UnitValue.createPercentArray(2));
table.addHeaderCell("#");
table.addHeaderCell("company");
table.addCell("name");
table.addCell("baeldung");
document.add(table);
// close the document
// this automatically closes the pdfDocument, which then closes automatically the pdfReader and pdfWriter
document.close();
}
}

View File

@ -1,45 +1,45 @@
package com.baeldung.pdfedition;
import java.io.IOException;
import com.itextpdf.kernel.colors.ColorConstants;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfTextReplacement {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-fixed.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
replaceTextContentFromDocument(pdfDocument);
pdfDocument.close();
}
private static void replaceTextContentFromDocument(PdfDocument pdfDocument) throws IOException {
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung tutorials").setRedactionColor(ColorConstants.WHITE));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
for (IPdfTextLocation location : strategy.getResultantLocations()) {
PdfPage page = pdfDocument.getPage(location.getPageNumber() + 1);
PdfCanvas pdfCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), page.getDocument());
Canvas canvas = new Canvas(pdfCanvas, location.getRectangle());
canvas.add(new Paragraph("HIDDEN").setFontSize(8)
.setMarginTop(0f));
}
}
}
package com.baeldung.pdfedition;
import java.io.IOException;
import com.itextpdf.kernel.colors.ColorConstants;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfPage;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.pdf.canvas.PdfCanvas;
import com.itextpdf.kernel.pdf.canvas.parser.listener.IPdfTextLocation;
import com.itextpdf.layout.Canvas;
import com.itextpdf.layout.element.Paragraph;
import com.itextpdf.pdfcleanup.PdfCleaner;
import com.itextpdf.pdfcleanup.autosweep.CompositeCleanupStrategy;
import com.itextpdf.pdfcleanup.autosweep.RegexBasedCleanupStrategy;
public class PdfTextReplacement {
private static final String SOURCE = "src/main/resources/baeldung-modified.pdf";
private static final String DESTINATION = "src/main/resources/baeldung-fixed.pdf";
public static void main(String[] args) throws IOException {
PdfReader reader = new PdfReader(SOURCE);
PdfWriter writer = new PdfWriter(DESTINATION);
PdfDocument pdfDocument = new PdfDocument(reader, writer);
replaceTextContentFromDocument(pdfDocument);
pdfDocument.close();
}
private static void replaceTextContentFromDocument(PdfDocument pdfDocument) throws IOException {
CompositeCleanupStrategy strategy = new CompositeCleanupStrategy();
strategy.add(new RegexBasedCleanupStrategy("Baeldung tutorials").setRedactionColor(ColorConstants.WHITE));
PdfCleaner.autoSweepCleanUp(pdfDocument, strategy);
for (IPdfTextLocation location : strategy.getResultantLocations()) {
PdfPage page = pdfDocument.getPage(location.getPageNumber() + 1);
PdfCanvas pdfCanvas = new PdfCanvas(page.newContentStreamAfter(), page.getResources(), page.getDocument());
Canvas canvas = new Canvas(pdfCanvas, location.getRectangle());
canvas.add(new Paragraph("HIDDEN").setFontSize(8)
.setMarginTop(0f));
}
}
}

View File

Before

Width:  |  Height:  |  Size: 4.5 KiB

After

Width:  |  Height:  |  Size: 4.5 KiB

View File

@ -9,7 +9,7 @@
<parent>
<groupId>com.baeldung</groupId>
<artifactId>parent-modules</artifactId>
<artifactId>text-processing-libraries-modules</artifactId>
<version>1.0.0-SNAPSHOT</version>
</parent>

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>text-processing-libraries-modules</artifactId>
<name>text-processing-libraries-modules</name>
<packaging>pom</packaging>
<parent>
<artifactId>parent-modules</artifactId>
<groupId>com.baeldung</groupId>
<version>1.0.0-SNAPSHOT</version>
</parent>
<modules>
<module>antlr</module>
<module>apache-tika</module>
<module>asciidoctor</module>
<module>pdf</module>
<module>pdf-2</module>
</modules>
</project>