Merge pull request #3225 from dhruba619/master

BAEL-1207 File search using Lucene
This commit is contained in:
Carsten Gräf 2017-12-18 06:48:14 +01:00 committed by GitHub
commit 89b2cdf238
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 116 additions and 0 deletions

1
lucene/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/index/

View File

@ -0,0 +1,80 @@
package com.baeldung.lucene;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
public class LuceneFileSearch {
private Directory indexDirectory;
private StandardAnalyzer analyzer;
public LuceneFileSearch(Directory fsDirectory, StandardAnalyzer analyzer) {
super();
this.indexDirectory = fsDirectory;
this.analyzer = analyzer;
}
public void addFileToIndex(String filepath) throws IOException, URISyntaxException {
Path path = Paths.get(getClass().getClassLoader().getResource(filepath).toURI());
File file = path.toFile();
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
Document document = new Document();
FileReader fileReader = new FileReader(file);
document.add(new TextField("contents", fileReader));
document.add(new StringField("path", file.getPath(), Field.Store.YES));
document.add(new StringField("filename", file.getName(), Field.Store.YES));
indexWriter.addDocument(document);
indexWriter.close();
}
public List<Document> searchFiles(String inField, String queryString) {
try {
Query query = new QueryParser(inField, analyzer).parse(queryString);
IndexReader indexReader = DirectoryReader.open(indexDirectory);
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, 10);
List<Document> documents = new ArrayList<>();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
documents.add(searcher.doc(scoreDoc.doc));
}
return documents;
} catch (IOException | ParseException e) {
e.printStackTrace();
}
return null;
}
}

View File

@ -0,0 +1,32 @@
package com.baeldung.lucene;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.List;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.junit.Assert;
import org.junit.Test;
public class LuceneFileSearchTest {
@Test
public void givenSearchQueryWhenFetchedFileNamehenCorrect() throws IOException, URISyntaxException {
String indexPath = "index";
String dataPath = "data/file1.txt";
Directory directory = FSDirectory.open(Paths.get(indexPath));
LuceneFileSearch luceneFileSearch = new LuceneFileSearch(directory, new StandardAnalyzer());
luceneFileSearch.addFileToIndex(dataPath);
List<Document> docs = luceneFileSearch.searchFiles("contents", "consectetur");
Assert.assertEquals("file1.txt", docs.get(0).get("filename"));
}
}

View File

@ -0,0 +1,3 @@
Cras auctor viverra arcu, id consequat diam posuere id. Pellentesque hendrerit felis tortor, et ornare nibh ullamcorper sed. Aenean sed mauris vitae purus auctor gravida. Nam aliquam egestas orci, sit amet imperdiet leo porttitor quis. Integer commodo sodales orci, ultrices vulputate arcu vestibulum non. Nunc at tellus id urna tristique ultrices in in massa. Vestibulum laoreet ullamcorper nulla vel porttitor. Duis blandit commodo elit at consequat. Vestibulum faucibus lectus eget mi tincidunt, quis molestie lacus mollis. Duis elementum urna eros, non iaculis est facilisis in. Praesent et neque vel ipsum viverra euismod ac ac metus. Ut vitae libero ex.
Proin consectetur, neque nec feugiat facilisis, metus libero mollis arcu, id pharetra nibh sapien in elit. Orci varius natoque penatibus et magnis dis parturient montes, nascetur ridiculus mus. Nam pulvinar fringilla orci in posuere. Duis ut turpis dignissim nisl eleifend posuere nec a massa. Cras fringilla iaculis ipsum a aliquet. Nunc ultrices nisl ipsum, vitae consectetur tellus vehicula in. Aliquam lacinia elit nec scelerisque dapibus. Duis pharetra mauris vitae quam tincidunt, viverra iaculis orci iaculis. Nunc gravida sem arcu, et mollis leo porttitor nec. Ut dictum tempor est, at fringilla ex feugiat sed. Nullam purus mi, aliquet eu libero ut, finibus efficitur metus.