diff --git a/libraries/OpenNLP/PartOfSpeechTag.txt b/libraries/OpenNLP/PartOfSpeechTag.txt
deleted file mode 100644
index fdd8238ec4..0000000000
--- a/libraries/OpenNLP/PartOfSpeechTag.txt
+++ /dev/null
@@ -1 +0,0 @@
-Out of the night that covers me
\ No newline at end of file
diff --git a/libraries/OpenNLP/doc-cat.train b/libraries/OpenNLP/doc-cat.train
deleted file mode 100644
index c457221ec6..0000000000
--- a/libraries/OpenNLP/doc-cat.train
+++ /dev/null
@@ -1,10 +0,0 @@
-GOOD good morning /
-GOOD good evening /
-GOOD have a good day /
-GOOD nice party! /
-GOOD fine pants /
-BAD nightmare volcano in the sea /
-BAD darkest sky /
-BAD greed and waste /
-BAD army attacks /
-BAD bomb explodes /
\ No newline at end of file
diff --git a/libraries/OpenNLP/en-chunker.bin b/libraries/OpenNLP/en-chunker.bin
deleted file mode 100644
index 65d9356888..0000000000
Binary files a/libraries/OpenNLP/en-chunker.bin and /dev/null differ
diff --git a/libraries/OpenNLP/en-ner-location.bin b/libraries/OpenNLP/en-ner-location.bin
deleted file mode 100644
index f3788bc1f6..0000000000
Binary files a/libraries/OpenNLP/en-ner-location.bin and /dev/null differ
diff --git a/libraries/OpenNLP/en-ner-person.bin b/libraries/OpenNLP/en-ner-person.bin
deleted file mode 100644
index 2f68318203..0000000000
Binary files a/libraries/OpenNLP/en-ner-person.bin and /dev/null differ
diff --git a/libraries/OpenNLP/en-pos-maxent.bin b/libraries/OpenNLP/en-pos-maxent.bin
deleted file mode 100644
index c8cae23c5f..0000000000
Binary files a/libraries/OpenNLP/en-pos-maxent.bin and /dev/null differ
diff --git a/libraries/OpenNLP/en-sent.bin b/libraries/OpenNLP/en-sent.bin
deleted file mode 100644
index e89076be5a..0000000000
Binary files a/libraries/OpenNLP/en-sent.bin and /dev/null differ
diff --git a/libraries/OpenNLP/en-token.bin b/libraries/OpenNLP/en-token.bin
deleted file mode 100644
index c417277ca7..0000000000
Binary files a/libraries/OpenNLP/en-token.bin and /dev/null differ
diff --git a/libraries/pom.xml b/libraries/pom.xml
index ff9c72399d..a3b78f1695 100644
--- a/libraries/pom.xml
+++ b/libraries/pom.xml
@@ -338,12 +338,6 @@
netty-all
${netty.version}
-
-
- org.apache.opennlp
- opennlp-tools
- 1.8.0
-
junit
junit
diff --git a/libraries/src/main/java/com/baeldung/opennlp/OpenNLP.java b/libraries/src/main/java/com/baeldung/opennlp/OpenNLP.java
deleted file mode 100644
index dd44e96a3a..0000000000
--- a/libraries/src/main/java/com/baeldung/opennlp/OpenNLP.java
+++ /dev/null
@@ -1,188 +0,0 @@
-package com.baeldung.opennlp;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.Arrays;
-import java.util.logging.Logger;
-
-import opennlp.tools.chunker.ChunkerME;
-import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.cmdline.postag.POSModelLoader;
-import opennlp.tools.doccat.DoccatFactory;
-import opennlp.tools.doccat.DoccatModel;
-import opennlp.tools.doccat.DocumentCategorizerME;
-import opennlp.tools.doccat.DocumentSample;
-import opennlp.tools.doccat.DocumentSampleStream;
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSSample;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.Tokenizer;
-import opennlp.tools.tokenize.TokenizerME;
-import opennlp.tools.tokenize.TokenizerModel;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-import opennlp.tools.util.InputStreamFactory;
-import opennlp.tools.util.InvalidFormatException;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
-import opennlp.tools.util.Span;
-import opennlp.tools.util.TrainingParameters;
-
-public class OpenNLP {
-
- private final static Logger LOGGER = Logger.getLogger(OpenNLP.class.getName());
- private final static String text = buildString();
- private final static String sentence[] = new String[] { "James", "Jordan", "live", "in", "Oklahoma", "city", "." };
-
- private DoccatModel docCatModel;
-
- public static void main(String[] args) {
- new OpenNLP();
- }
-
- public static String buildString(){
- StringBuilder sb = new StringBuilder();
- sb.append("To get to the south:");
- sb.append(" Go to the store.");
- sb.append(" Buy a compass.");
- sb.append(" Use the compass.");
- sb.append(" Then walk to the south.");
- return sb.toString();
- }
-
- public OpenNLP() {
- try {
- sentenceDetector();
- tokenizer();
- nameFinder();
- locationFinder();
- trainDocumentCategorizer();
- documentCategorizer();
- partOfSpeechTagger();
- chunker();
- } catch (InvalidFormatException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public void sentenceDetector() throws InvalidFormatException, IOException {
-
- InputStream is = new FileInputStream("OpenNLP/en-sent.bin");
- SentenceModel model = new SentenceModel(is);
- SentenceDetectorME sdetector = new SentenceDetectorME(model);
- String sentences[] = sdetector.sentDetect(text);
- for (String sentence : sentences) {
- LOGGER.info(sentence);
- }
- is.close();
- }
-
- public void tokenizer() throws InvalidFormatException, IOException {
- InputStream is = new FileInputStream("OpenNLP/en-token.bin");
- TokenizerModel model = new TokenizerModel(is);
- Tokenizer tokenizer = new TokenizerME(model);
- String tokens[] = tokenizer.tokenize(text);
- for (String token : tokens) {
- LOGGER.info(token);
- }
- is.close();
- }
-
- public static void nameFinder() throws IOException {
- InputStream is = new FileInputStream("OpenNLP/en-ner-person.bin");
- TokenNameFinderModel model = new TokenNameFinderModel(is);
- is.close();
- NameFinderME nameFinder = new NameFinderME(model);
- Span nameSpans[] = nameFinder.find(sentence);
- String[] names = Span.spansToStrings(nameSpans, sentence);
- Arrays.stream(names).forEach(LOGGER::info);
- for (String name : names) {
- LOGGER.info(name);
- }
- }
-
- public static void locationFinder() throws IOException {
- InputStream is = new FileInputStream("OpenNLP/en-ner-location.bin");
- TokenNameFinderModel model = new TokenNameFinderModel(is);
- is.close();
- NameFinderME nameFinder = new NameFinderME(model);
- Span locationSpans[] = nameFinder.find(sentence);
- String[] locations = Span.spansToStrings(locationSpans, sentence);
- Arrays.stream(locations).forEach(LOGGER::info);
- for (String location : locations) {
- LOGGER.info(location);
- }
- }
-
- public void trainDocumentCategorizer() {
-
- try {
- InputStreamFactory isf = new InputStreamFactory() {
- public InputStream createInputStream() throws IOException {
- return new FileInputStream("OpenNLP/doc-cat.train");
- }
- };
- ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
- ObjectStream sampleStream = new DocumentSampleStream(lineStream);
- DoccatFactory docCatFactory = new DoccatFactory();
- docCatModel = DocumentCategorizerME.train("en", sampleStream, TrainingParameters.defaultParams(), docCatFactory);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public void documentCategorizer() {
- DocumentCategorizerME myCategorizer = new DocumentCategorizerME(docCatModel);
- double[] outcomes = myCategorizer.categorize(sentence);
- String category = myCategorizer.getBestCategory(outcomes);
-
- if (category.equalsIgnoreCase("GOOD")) {
- LOGGER.info("Document is positive :) ");
- } else {
- LOGGER.info("Document is negative :( ");
- }
- }
-
- public static void partOfSpeechTagger() throws IOException {
- try {
- POSModel posModel = new POSModelLoader().load(new File("OpenNLP/en-pos-maxent.bin"));
- POSTaggerME posTaggerME = new POSTaggerME(posModel);
- InputStreamFactory isf = new InputStreamFactory() {
- public InputStream createInputStream() throws IOException {
- return new FileInputStream("OpenNLP/PartOfSpeechTag.txt");
- }
- };
- ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
- String line;
- while ((line = lineStream.read()) != null) {
- String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
- String[] tags = posTaggerME.tag(whitespaceTokenizerLine);
- POSSample posSample = new POSSample(whitespaceTokenizerLine, tags);
- LOGGER.info(posSample.toString());
- }
- lineStream.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public static void chunker() throws IOException {
- InputStream is = new FileInputStream("OpenNLP/en-chunker.bin");
- ChunkerModel cModel = new ChunkerModel(is);
- ChunkerME chunkerME = new ChunkerME(cModel);
- String[] taggedSentence = new String[] {"Out", "of", "the", "night", "that", "covers", "me"};
- String pos[] = new String[] { "IN", "IN", "DT", "NN", "WDT", "VBZ", "PRP"};
- String chunks[] = chunkerME.chunk(taggedSentence, pos);
- for (String chunk : chunks) {
- LOGGER.info(chunk);
- }
- }
-
-}
diff --git a/libraries/src/test/java/com/baeldung/opennlp/OpenNLPTests.java b/libraries/src/test/java/com/baeldung/opennlp/OpenNLPTests.java
deleted file mode 100644
index 38bc8e002b..0000000000
--- a/libraries/src/test/java/com/baeldung/opennlp/OpenNLPTests.java
+++ /dev/null
@@ -1,151 +0,0 @@
-package com.baeldung.opennlp;
-
-import opennlp.tools.chunker.ChunkerME;
-import opennlp.tools.chunker.ChunkerModel;
-import opennlp.tools.cmdline.postag.POSModelLoader;
-import opennlp.tools.doccat.DoccatFactory;
-import opennlp.tools.doccat.DoccatModel;
-import opennlp.tools.doccat.DocumentCategorizerME;
-import opennlp.tools.doccat.DocumentSample;
-import opennlp.tools.doccat.DocumentSampleStream;
-import opennlp.tools.namefind.NameFinderME;
-import opennlp.tools.namefind.TokenNameFinderModel;
-import opennlp.tools.postag.POSModel;
-import opennlp.tools.postag.POSSample;
-import opennlp.tools.postag.POSTaggerME;
-import opennlp.tools.sentdetect.SentenceDetectorME;
-import opennlp.tools.sentdetect.SentenceModel;
-import opennlp.tools.tokenize.WhitespaceTokenizer;
-import opennlp.tools.util.InputStreamFactory;
-import opennlp.tools.util.ObjectStream;
-import opennlp.tools.util.PlainTextByLineStream;
-import opennlp.tools.util.Span;
-import opennlp.tools.util.TrainingParameters;
-import org.junit.Test;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.FileNotFoundException;
-import java.io.IOException;
-import java.io.InputStream;
-
-import static org.junit.Assert.assertEquals;
-
-public class OpenNLPTests {
-
- private final static String text = "To get to the south: Go to the store. Buy a compass. Use the compass. Then walk to the south.";
- private final static String sentence[] = new String[]{"James", "Jordan", "live", "in", "Oklahoma", "city", "."};
-
- @Test
- public void givenText_WhenDetectSentences_ThenCountSentences() {
- InputStream is;
- SentenceModel model;
- try {
- is = new FileInputStream("OpenNLP/en-sent.bin");
- model = new SentenceModel(is);
- SentenceDetectorME sdetector = new SentenceDetectorME(model);
- String sentences[] = sdetector.sentDetect(text);
- assertEquals(4, sentences.length);
- is.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- @Test
- public void givenText_WhenDetectTokens_ThenVerifyNames() {
- InputStream is;
- TokenNameFinderModel model;
- try {
- is = new FileInputStream("OpenNLP/en-ner-person.bin");
- model = new TokenNameFinderModel(is);
- is.close();
- NameFinderME nameFinder = new NameFinderME(model);
- Span nameSpans[] = nameFinder.find(sentence);
- String[] names = Span.spansToStrings(nameSpans, sentence);
- assertEquals(1, names.length);
- assertEquals("James Jordan", names[0]);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- @Test
- public void givenText_WhenDetectTokens_ThenVerifyLocations() {
- InputStream is;
- TokenNameFinderModel model;
- try {
- is = new FileInputStream("OpenNLP/en-ner-location.bin");
- model = new TokenNameFinderModel(is);
- is.close();
- NameFinderME nameFinder = new NameFinderME(model);
- Span locationSpans[] = nameFinder.find(sentence);
- String[] locations = Span.spansToStrings(locationSpans, sentence);
- assertEquals(1, locations.length);
- assertEquals("Oklahoma", locations[0]);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- @Test
- public void givenText_WhenCategorizeDocument_ThenVerifyDocumentContent() {
- DoccatModel docCatModel;
- try {
- InputStreamFactory isf = new InputStreamFactory() {
- public InputStream createInputStream() throws IOException {
- return new FileInputStream("OpenNLP/doc-cat.train");
- }
- };
- ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
- ObjectStream sampleStream = new DocumentSampleStream(lineStream);
- DoccatFactory docCatFactory = new DoccatFactory();
- docCatModel = DocumentCategorizerME.train("en", sampleStream, TrainingParameters.defaultParams(), docCatFactory);
- DocumentCategorizerME myCategorizer = new DocumentCategorizerME(docCatModel);
- double[] outcomes = myCategorizer.categorize(sentence);
- String category = myCategorizer.getBestCategory(outcomes);
- assertEquals("GOOD", category);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- @Test
- public void givenText_WhenTagDocument_ThenVerifyTaggedString() {
- try {
- POSModel posModel = new POSModelLoader().load(new File("OpenNLP/en-pos-maxent.bin"));
- POSTaggerME posTaggerME = new POSTaggerME(posModel);
- InputStreamFactory isf = new InputStreamFactory() {
- public InputStream createInputStream() throws IOException {
- return new FileInputStream("OpenNLP/PartOfSpeechTag.txt");
- }
- };
- ObjectStream lineStream = new PlainTextByLineStream(isf, "UTF-8");
- String line;
- while ((line = lineStream.read()) != null) {
- String whitespaceTokenizerLine[] = WhitespaceTokenizer.INSTANCE.tokenize(line);
- String[] tags = posTaggerME.tag(whitespaceTokenizerLine);
- POSSample posSample = new POSSample(whitespaceTokenizerLine, tags);
- assertEquals("Out_IN of_IN the_DT night_NN that_WDT covers_VBZ me_PRP", posSample.toString());
- }
- lineStream.close();
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- @Test
- public void givenText_WhenChunked_ThenCountChunks() {
- try {
- InputStream is = new FileInputStream("OpenNLP/en-chunker.bin");
- ChunkerModel cModel = new ChunkerModel(is);
- ChunkerME chunkerME = new ChunkerME(cModel);
- String pos[] = new String[]{"NNP", "NNP", "NNP", "POS", "NNP", "NN", "VBD"};
- String chunks[] = chunkerME.chunk(sentence, pos);
- assertEquals(7, chunks.length);
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
-}