From 68eae8ebf414194419123d2e9a28e5f18517fd13 Mon Sep 17 00:00:00 2001 From: "dhrubajyoti.bhattacharjee" Date: Mon, 27 Nov 2017 21:14:31 +0530 Subject: [PATCH] BAEL-1160 Introduction to Apache Lucene-Added new sections --- lucene/pom.xml | 2 - .../baeldung/lucene/InMemoryLuceneIndex.java | 54 +++++++- .../lucene/LuceneInMemorySearchTest.java | 131 +++++++++++++++++- 3 files changed, 182 insertions(+), 5 deletions(-) diff --git a/lucene/pom.xml b/lucene/pom.xml index 42b81a7d4a..6659d9ac32 100644 --- a/lucene/pom.xml +++ b/lucene/pom.xml @@ -31,7 +31,5 @@ 4.12 test - - \ No newline at end of file diff --git a/lucene/src/main/java/com/baeldung/lucene/InMemoryLuceneIndex.java b/lucene/src/main/java/com/baeldung/lucene/InMemoryLuceneIndex.java index 40a35fad86..97b1ec7b5d 100644 --- a/lucene/src/main/java/com/baeldung/lucene/InMemoryLuceneIndex.java +++ b/lucene/src/main/java/com/baeldung/lucene/InMemoryLuceneIndex.java @@ -7,18 +7,22 @@ import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; import org.apache.lucene.queryparser.classic.ParseException; import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Sort; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; public class InMemoryLuceneIndex { @@ -45,6 +49,7 @@ public class InMemoryLuceneIndex { document.add(new TextField("title", title, Field.Store.YES)); document.add(new TextField("body", body, Field.Store.YES)); + document.add(new SortedDocValuesField("title", new BytesRef(title))); writter.addDocument(document); writter.close(); @@ -73,6 +78,51 @@ public class InMemoryLuceneIndex { } + public void deleteDocument(Term term) { + try { + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); + IndexWriter writter = new IndexWriter(memoryIndex, indexWriterConfig); + writter.deleteDocuments(term); + writter.close(); + } catch (IOException e) { + e.printStackTrace(); + } + } + + public List searchIndex(Query query) { + try { + IndexReader indexReader = DirectoryReader.open(memoryIndex); + IndexSearcher searcher = new IndexSearcher(indexReader); + TopDocs topDocs = searcher.search(query, 10); + List documents = new ArrayList<>(); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + documents.add(searcher.doc(scoreDoc.doc)); + } + + return documents; + } catch (IOException e) { + e.printStackTrace(); + } + return null; + + } + + public List searchIndex(Query query, Sort sort) { + try { + IndexReader indexReader = DirectoryReader.open(memoryIndex); + IndexSearcher searcher = new IndexSearcher(indexReader); + TopDocs topDocs = searcher.search(query, 10, sort); + List documents = new ArrayList<>(); + for (ScoreDoc scoreDoc : topDocs.scoreDocs) { + documents.add(searcher.doc(scoreDoc.doc)); + } + + return documents; + } catch (IOException e) { + e.printStackTrace(); + } + return null; + + } + } - - diff --git a/lucene/src/test/java/com/baeldung/lucene/LuceneInMemorySearchTest.java b/lucene/src/test/java/com/baeldung/lucene/LuceneInMemorySearchTest.java index c3a498a4b8..acf688cb99 100644 --- a/lucene/src/test/java/com/baeldung/lucene/LuceneInMemorySearchTest.java +++ b/lucene/src/test/java/com/baeldung/lucene/LuceneInMemorySearchTest.java @@ -4,7 +4,19 @@ import java.util.List; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import org.junit.Assert; import org.junit.Test; @@ -20,4 +32,121 @@ public class LuceneInMemorySearchTest { Assert.assertEquals("Hello world", documents.get(0).get("title")); } -} + @Test + public void givenTermQueryWhenFetchedDocumentThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("activity", "running in track"); + inMemoryLuceneIndex.indexDocument("activity", "Cars are running on road"); + + Term term = new Term("body", "running"); + Query query = new TermQuery(term); + + List documents = inMemoryLuceneIndex.searchIndex(query); + Assert.assertEquals(2, documents.size()); + } + + @Test + public void givenPrefixQueryWhenFetchedDocumentThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("article", "Lucene introduction"); + inMemoryLuceneIndex.indexDocument("article", "Introduction to Lucene"); + + Term term = new Term("body", "intro"); + Query query = new PrefixQuery(term); + + List documents = inMemoryLuceneIndex.searchIndex(query); + Assert.assertEquals(2, documents.size()); + } + + @Test + public void givenBooleanQueryWhenFetchedDocumentThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("Destination", "Las Vegas singapore car"); + inMemoryLuceneIndex.indexDocument("Commutes in singapore", "Bus Car Bikes"); + + Term term1 = new Term("body", "singapore"); + Term term2 = new Term("body", "car"); + + TermQuery query1 = new TermQuery(term1); + TermQuery query2 = new TermQuery(term2); + + BooleanQuery booleanQuery = new BooleanQuery.Builder().add(query1, BooleanClause.Occur.MUST) + .add(query2, BooleanClause.Occur.MUST).build(); + + List documents = inMemoryLuceneIndex.searchIndex(booleanQuery); + Assert.assertEquals(1, documents.size()); + } + + @Test + public void givenPhraseQueryWhenFetchedDocumentThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("quotes", "A rose by any other name would smell as sweet."); + + Query query = new PhraseQuery(1, "body", new BytesRef("smell"), new BytesRef("sweet")); + List documents = inMemoryLuceneIndex.searchIndex(query); + + Assert.assertEquals(1, documents.size()); + } + + @Test + public void givenFuzzyQueryWhenFetchedDocumentThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("article", "Halloween Festival"); + inMemoryLuceneIndex.indexDocument("decoration", "Decorations for Halloween"); + + Term term = new Term("body", "hallowen"); + Query query = new FuzzyQuery(term); + + List documents = inMemoryLuceneIndex.searchIndex(query); + Assert.assertEquals(2, documents.size()); + } + + @Test + public void givenWildCardQueryWhenFetchedDocumentThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("article", "Lucene introduction"); + inMemoryLuceneIndex.indexDocument("article", "Introducing Lucene with Spring"); + + Term term = new Term("body", "intro*"); + Query query = new WildcardQuery(term); + + List documents = inMemoryLuceneIndex.searchIndex(query); + Assert.assertEquals(2, documents.size()); + } + + @Test + public void givenSortFieldWhenSortedThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("Ganges", "River in India"); + inMemoryLuceneIndex.indexDocument("Mekong", "This river flows in south Asia"); + inMemoryLuceneIndex.indexDocument("Amazon", "Rain forest river"); + inMemoryLuceneIndex.indexDocument("Rhine", "Belongs to Europe"); + inMemoryLuceneIndex.indexDocument("Nile", "Longest River"); + + Term term = new Term("body", "river"); + Query query = new WildcardQuery(term); + + SortField sortField = new SortField("title", SortField.Type.STRING_VAL, false); + Sort sortByTitle = new Sort(sortField); + + List documents = inMemoryLuceneIndex.searchIndex(query, sortByTitle); + Assert.assertEquals(4, documents.size()); + Assert.assertEquals("Amazon", documents.get(0).getField("title").stringValue()); + } + + @Test + public void whenDocumentDeletedThenCorrect() { + InMemoryLuceneIndex inMemoryLuceneIndex = new InMemoryLuceneIndex(new RAMDirectory(), new StandardAnalyzer()); + inMemoryLuceneIndex.indexDocument("Ganges", "River in India"); + inMemoryLuceneIndex.indexDocument("Mekong", "This river flows in south Asia"); + + Term term = new Term("title", "ganges"); + inMemoryLuceneIndex.deleteDocument(term); + + Query query = new TermQuery(term); + + List documents = inMemoryLuceneIndex.searchIndex(query); + Assert.assertEquals(0, documents.size()); + } + +} \ No newline at end of file