BAEL-5766 Overview of NLP Libraries in Java (#13798)
* BAEL-5766 Overview of NLP Libraries in Java * BAEL-5766 Overview of NLP Libraries in Java * Overview of NLP Libraries in Java * Overview of NLP Libraries in Java
This commit is contained in:
parent
2852feab1c
commit
b0116c225e
1
libraries-ai/README.md
Normal file
1
libraries-ai/README.md
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
33
libraries-ai/pom.xml
Normal file
33
libraries-ai/pom.xml
Normal file
@ -0,0 +1,33 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||||
|
<modelVersion>4.0.0</modelVersion>
|
||||||
|
<artifactId>libraries-ai</artifactId>
|
||||||
|
<name>libraries-ai</name>
|
||||||
|
|
||||||
|
<parent>
|
||||||
|
<groupId>com.baeldung</groupId>
|
||||||
|
<artifactId>parent-modules</artifactId>
|
||||||
|
<version>1.0.0-SNAPSHOT</version>
|
||||||
|
</parent>
|
||||||
|
|
||||||
|
<dependencies>
|
||||||
|
<dependency>
|
||||||
|
<groupId>edu.stanford.nlp</groupId>
|
||||||
|
<artifactId>stanford-corenlp</artifactId>
|
||||||
|
<version>${stanford-corenlp.version}</version>
|
||||||
|
</dependency>
|
||||||
|
<dependency>
|
||||||
|
<groupId>org.apache.opennlp</groupId>
|
||||||
|
<artifactId>opennlp-tools</artifactId>
|
||||||
|
<version>${opennlp-tools.version}</version>
|
||||||
|
</dependency>
|
||||||
|
</dependencies>
|
||||||
|
|
||||||
|
<properties>
|
||||||
|
<stanford-corenlp.version>4.5.3</stanford-corenlp.version>
|
||||||
|
<opennlp-tools.version>2.1.1</opennlp-tools.version>
|
||||||
|
</properties>
|
||||||
|
|
||||||
|
</project>
|
@ -0,0 +1,41 @@
|
|||||||
|
package com.baeldung.nlp;
|
||||||
|
|
||||||
|
import edu.stanford.nlp.ling.CoreAnnotations;
|
||||||
|
import edu.stanford.nlp.ling.CoreLabel;
|
||||||
|
import edu.stanford.nlp.pipeline.Annotation;
|
||||||
|
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
|
||||||
|
import edu.stanford.nlp.util.CoreMap;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Properties;
|
||||||
|
|
||||||
|
import static org.junit.Assert.assertEquals;
|
||||||
|
|
||||||
|
public class CoreNLPTokenizerUnitTest {
|
||||||
|
@Test
|
||||||
|
public void givenSampleText_whenTokenize_thenExpectedTokensReturned() {
|
||||||
|
|
||||||
|
Properties props = new Properties();
|
||||||
|
props.setProperty("annotators", "tokenize");
|
||||||
|
|
||||||
|
StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
|
||||||
|
String text = "The german shepard display an act of kindness";
|
||||||
|
|
||||||
|
Annotation document = new Annotation(text);
|
||||||
|
pipeline.annotate(document);
|
||||||
|
|
||||||
|
List<CoreMap> sentences = document.get(CoreAnnotations.SentencesAnnotation.class);
|
||||||
|
StringBuilder tokens = new StringBuilder();
|
||||||
|
|
||||||
|
for (CoreMap sentence : sentences) {
|
||||||
|
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
|
||||||
|
String word = token.get(CoreAnnotations.TextAnnotation.class);
|
||||||
|
tokens.append(word)
|
||||||
|
.append(" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assertEquals("The german shepard display an act of kindness", tokens.toString()
|
||||||
|
.trim());
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,33 @@
|
|||||||
|
package com.baeldung.nlp;
|
||||||
|
|
||||||
|
import opennlp.tools.langdetect.Language;
|
||||||
|
import opennlp.tools.langdetect.LanguageDetectorME;
|
||||||
|
import opennlp.tools.langdetect.LanguageDetectorModel;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||||
|
|
||||||
|
class OpenNLPLanguageDetector {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void givenTextInEnglish_whenDetectLanguage_thenReturnsEnglishLanguageCode() {
|
||||||
|
|
||||||
|
String text = "the dream my father told me";
|
||||||
|
LanguageDetectorModel model;
|
||||||
|
|
||||||
|
try (InputStream modelIn = new FileInputStream("langdetect-183.bin")) {
|
||||||
|
model = new LanguageDetectorModel(modelIn);
|
||||||
|
} catch (IOException e) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LanguageDetectorME detector = new LanguageDetectorME(model);
|
||||||
|
Language language = detector.predictLanguage(text);
|
||||||
|
|
||||||
|
assertEquals("eng", language.getLang());
|
||||||
|
}
|
||||||
|
}
|
1
pom.xml
1
pom.xml
@ -1148,6 +1148,7 @@
|
|||||||
<module>libraries-http</module>
|
<module>libraries-http</module>
|
||||||
<module>libraries-http-2</module>
|
<module>libraries-http-2</module>
|
||||||
<module>libraries-io</module>
|
<module>libraries-io</module>
|
||||||
|
<module>libraries-ai</module>
|
||||||
<module>libraries-primitive</module>
|
<module>libraries-primitive</module>
|
||||||
<module>libraries-rpc</module>
|
<module>libraries-rpc</module>
|
||||||
<module>libraries-server</module>
|
<module>libraries-server</module>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user