BAEL-1557 Refactor

This commit is contained in:
mansi2392 2018-04-04 00:36:59 +05:30
parent 3cc84f5446
commit 8572fa3213
7 changed files with 19 additions and 24 deletions

View File

@ -13,12 +13,12 @@ import org.junit.Test;
public class ChunkerTest { public class ChunkerTest {
@Test @Test
public void givenSentence_whenChunk_thenGetChunks() throws Exception { public void givenChunkerModel_whenChunk_thenChunksAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion."); String[] tokens = tokenizer.tokenize("He reckons the current account deficit will narrow to only 8 billion.");
InputStream inputStreamPOSTagger = new FileInputStream("src/main/resources/models/en-pos-maxent.bin"); InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
POSModel posModel = new POSModel(inputStreamPOSTagger); POSModel posModel = new POSModel(inputStreamPOSTagger);
POSTaggerME posTagger = new POSTaggerME(posModel); POSTaggerME posTagger = new POSTaggerME(posModel);
String tags[] = posTagger.tag(tokens); String tags[] = posTagger.tag(tokens);

View File

@ -18,10 +18,10 @@ import opennlp.tools.util.TrainingParameters;
import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThat;
import org.junit.Test; import org.junit.Test;
public class LanguageDetectorAndTrainingData { public class LanguageDetectorAndTrainingDataTest {
@Test @Test
public void test() throws FileNotFoundException, IOException { public void givenLanguageDictionary_whenLanguageDetect_thenLanguageIsDetected() throws FileNotFoundException, IOException {
InputStreamFactory dataIn = new MarkableFileInputStreamFactory(new File("src/main/resources/models/DoccatSample.txt")); InputStreamFactory dataIn = new MarkableFileInputStreamFactory(new File("src/main/resources/models/DoccatSample.txt"));
ObjectStream lineStream = new PlainTextByLineStream(dataIn, "UTF-8"); ObjectStream lineStream = new PlainTextByLineStream(dataIn, "UTF-8");
LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream); LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream);

View File

@ -1,7 +1,5 @@
package com.baeldung.apache.opennlp; package com.baeldung.apache.opennlp;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import opennlp.tools.lemmatizer.DictionaryLemmatizer; import opennlp.tools.lemmatizer.DictionaryLemmatizer;
import opennlp.tools.postag.POSModel; import opennlp.tools.postag.POSModel;
@ -13,16 +11,16 @@ import org.junit.Test;
public class LemmetizerTest { public class LemmetizerTest {
@Test @Test
public void givenSentence_whenLemmetize_thenGetLemmas() throws Exception { public void givenEnglishDictionary_whenLemmatize_thenLemmasAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("John has a sister named Penny."); String[] tokens = tokenizer.tokenize("John has a sister named Penny.");
InputStream inputStreamPOSTagger = new FileInputStream("src/main/resources/models/en-pos-maxent.bin"); InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
POSModel posModel = new POSModel(inputStreamPOSTagger); POSModel posModel = new POSModel(inputStreamPOSTagger);
POSTaggerME posTagger = new POSTaggerME(posModel); POSTaggerME posTagger = new POSTaggerME(posModel);
String tags[] = posTagger.tag(tokens); String tags[] = posTagger.tag(tokens);
InputStream dictLemmatizer = new FileInputStream("src/main/resources/models/en-lemmatizer.dict"); InputStream dictLemmatizer = getClass().getResourceAsStream("/models/en-lemmatizer.dict");
DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer); DictionaryLemmatizer lemmatizer = new DictionaryLemmatizer(dictLemmatizer);
String[] lemmas = lemmatizer.lemmatize(tokens, tags); String[] lemmas = lemmatizer.lemmatize(tokens, tags);

View File

@ -1,6 +1,5 @@
package com.baeldung.apache.opennlp; package com.baeldung.apache.opennlp;
import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
@ -15,12 +14,12 @@ import org.junit.Test;
public class NamedEntityRecognitionTest { public class NamedEntityRecognitionTest {
@Test @Test
public void givenTextWithPersonNames_whenNER_thenGetPersonNamesList() throws Exception { public void givenEnglishPersonModel_whenNER_thenPersonsAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("John is 26 years old. His best friend's name is Leonard. He has a sister named Penny."); String[] tokens = tokenizer.tokenize("John is 26 years old. His best friend's name is Leonard. He has a sister named Penny.");
InputStream inputStreamNameFinder = new FileInputStream("src/main/resources/models/en-ner-person.bin"); InputStream inputStreamNameFinder = getClass().getResourceAsStream("/models/en-ner-person.bin");
TokenNameFinderModel model = new TokenNameFinderModel(inputStreamNameFinder); TokenNameFinderModel model = new TokenNameFinderModel(inputStreamNameFinder);
NameFinderME nameFinderME = new NameFinderME(model); NameFinderME nameFinderME = new NameFinderME(model);
List<Span> spans = Arrays.asList(nameFinderME.find(tokens)); List<Span> spans = Arrays.asList(nameFinderME.find(tokens));

View File

@ -1,6 +1,5 @@
package com.baeldung.apache.opennlp; package com.baeldung.apache.opennlp;
import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import opennlp.tools.postag.POSModel; import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME; import opennlp.tools.postag.POSTaggerME;
@ -11,12 +10,12 @@ import org.junit.Test;
public class POSTaggerTest { public class POSTaggerTest {
@Test @Test
public void givenSentence_whenPOSTagging_thenGetTags() throws Exception { public void givenPOSModel_whenPOSTagging_thenPOSAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("John has a sister named Penny."); String[] tokens = tokenizer.tokenize("John has a sister named Penny.");
InputStream inputStreamPOSTagger = new FileInputStream("src/main/resources/models/en-pos-maxent.bin"); InputStream inputStreamPOSTagger = getClass().getResourceAsStream("/models/en-pos-maxent.bin");
POSModel posModel = new POSModel(inputStreamPOSTagger); POSModel posModel = new POSModel(inputStreamPOSTagger);
POSTaggerME posTagger = new POSTaggerME(posModel); POSTaggerME posTagger = new POSTaggerME(posModel);
String tags[] = posTagger.tag(tokens); String tags[] = posTagger.tag(tokens);

View File

@ -1,6 +1,5 @@
package com.baeldung.apache.opennlp; package com.baeldung.apache.opennlp;
import java.io.FileInputStream;
import java.io.InputStream; import java.io.InputStream;
import opennlp.tools.sentdetect.SentenceDetectorME; import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel; import opennlp.tools.sentdetect.SentenceModel;
@ -10,12 +9,12 @@ import org.junit.Test;
public class SentenceDetectionTest { public class SentenceDetectionTest {
@Test @Test
public void givenText_whenDetectSent_thenGetSentences() throws Exception { public void givenEnglishModel_whenDetect_thenSentencesAreDetected() throws Exception {
String paragraph = "This is a statement. This is another statement. Now is an abstract word for time, " String paragraph = "This is a statement. This is another statement. Now is an abstract word for time, "
+ "that is always flying. And my email address is google@gmail.com."; + "that is always flying. And my email address is google@gmail.com.";
InputStream is = new FileInputStream("src/main/resources/models/en-sent.bin"); InputStream is = getClass().getResourceAsStream("/models/en-sent.bin");
SentenceModel model = new SentenceModel(is); SentenceModel model = new SentenceModel(is);
SentenceDetectorME sdetector = new SentenceDetectorME(model); SentenceDetectorME sdetector = new SentenceDetectorME(model);

View File

@ -1,6 +1,6 @@
package com.baeldung.apache.opennlp; package com.baeldung.apache.opennlp;
import java.io.FileInputStream; import java.io.InputStream;
import opennlp.tools.tokenize.SimpleTokenizer; import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.TokenizerME; import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel; import opennlp.tools.tokenize.TokenizerModel;
@ -11,23 +11,23 @@ import org.junit.Test;
public class TokenizerTest { public class TokenizerTest {
@Test @Test
public void givenString_whenTokenize_thenGetTokens() throws Exception { public void givenEnglishModel_whenTokenize_thenTokensAreDetected() throws Exception {
FileInputStream fileInputStream = new FileInputStream("src/main/resources/models/en-token.bin"); InputStream inputStream = getClass().getResourceAsStream("/models/en-token.bin");
TokenizerModel model = new TokenizerModel(fileInputStream); TokenizerModel model = new TokenizerModel(inputStream);
TokenizerME tokenizer = new TokenizerME(model); TokenizerME tokenizer = new TokenizerME(model);
String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource."); String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource.");
assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource", "."); assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource", ".");
} }
@Test @Test
public void givenString_whenWhitespaceTokenizer_thenGetTokens() throws Exception { public void givenWhitespaceTokenizer_whenTokenize_thenTokensAreDetected() throws Exception {
WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE; WhitespaceTokenizer tokenizer = WhitespaceTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource."); String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource.");
assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource."); assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource.");
} }
@Test @Test
public void givenString_whenSimpleTokenizer_thenGetTokens() throws Exception { public void givenSimpleTokenizer_whenTokenize_thenTokensAreDetected() throws Exception {
SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource."); String[] tokens = tokenizer.tokenize("Baeldung is a Spring Resource.");
assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource", "."); assertThat(tokens).contains("Baeldung", "is", "a", "Spring", "Resource", ".");