mirror of https://github.com/apache/lucene.git
LUCENE-6992: Allow adding IndexableField to MemoryIndex
This commit is contained in:
parent
a3b2ad334c
commit
5e935d7106
|
@ -257,6 +257,9 @@ Other
|
|||
Locale#forLanguageTag() and Locale#toString() were placed on list
|
||||
of forbidden signatures. (Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-6988: You can now add IndexableFields directly to a MemoryIndex,
|
||||
and create a MemoryIndex from a lucene Document. (Alan Woodward)
|
||||
|
||||
======================= Lucene 5.4.1 =======================
|
||||
|
||||
Bug Fixes
|
||||
|
|
|
@ -33,46 +33,18 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.PointValues;
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SortedDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.SimpleCollector;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.apache.lucene.util.IntBlockPool.SliceReader;
|
||||
import org.apache.lucene.util.IntBlockPool.SliceWriter;
|
||||
import org.apache.lucene.util.IntBlockPool;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.RecyclingByteBlockAllocator;
|
||||
import org.apache.lucene.util.RecyclingIntBlockAllocator;
|
||||
|
||||
/**
|
||||
* High-performance single-document main memory Apache Lucene fulltext search index.
|
||||
|
@ -288,6 +260,46 @@ public class MemoryIndex {
|
|||
addField(fieldName, stream, 1.0f, analyzer.getPositionIncrementGap(fieldName), analyzer.getOffsetGap(fieldName));
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a MemoryIndex from a lucene {@link Document} using an analyzer
|
||||
*
|
||||
* @param document the document to index
|
||||
* @param analyzer the analyzer to use
|
||||
* @return a MemoryIndex
|
||||
*/
|
||||
public static MemoryIndex fromDocument(Document document, Analyzer analyzer) {
|
||||
return fromDocument(document, analyzer, false, false, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a MemoryIndex from a lucene {@link Document} using an analyzer
|
||||
* @param document the document to index
|
||||
* @param analyzer the analyzer to use
|
||||
* @param storeOffsets <code>true</code> if offsets should be stored
|
||||
* @param storePayloads <code>true</code> if payloads should be stored
|
||||
* @return a MemoryIndex
|
||||
*/
|
||||
public static MemoryIndex fromDocument(Document document, Analyzer analyzer, boolean storeOffsets, boolean storePayloads) {
|
||||
return fromDocument(document, analyzer, storeOffsets, storePayloads, 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a MemoryIndex from a lucene {@link Document} using an analyzer
|
||||
* @param document the document to index
|
||||
* @param analyzer the analyzer to use
|
||||
* @param storeOffsets <code>true</code> if offsets should be stored
|
||||
* @param storePayloads <code>true</code> if payloads should be stored
|
||||
* @param maxReusedBytes the number of bytes that should remain in the internal memory pools after {@link #reset()} is called
|
||||
* @return a MemoryIndex
|
||||
*/
|
||||
public static MemoryIndex fromDocument(Document document, Analyzer analyzer, boolean storeOffsets, boolean storePayloads, long maxReusedBytes) {
|
||||
MemoryIndex mi = new MemoryIndex(storeOffsets, storePayloads, maxReusedBytes);
|
||||
for (IndexableField field : document) {
|
||||
mi.addField(field, analyzer);
|
||||
}
|
||||
return mi;
|
||||
}
|
||||
|
||||
/**
|
||||
* Convenience method; Creates and returns a token stream that generates a
|
||||
* token for each keyword in the given collection, "as is", without any
|
||||
|
@ -339,6 +351,39 @@ public class MemoryIndex {
|
|||
public void addField(String fieldName, TokenStream stream) {
|
||||
addField(fieldName, stream, 1.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer
|
||||
* @param field the field to add
|
||||
* @param analyzer the analyzer to use for term analysis
|
||||
* @throws IllegalArgumentException if the field is a DocValues or Point field, as these
|
||||
* structures are not supported by MemoryIndex
|
||||
*/
|
||||
public void addField(IndexableField field, Analyzer analyzer) {
|
||||
addField(field, analyzer, 1.0f);
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a lucene {@link IndexableField} to the MemoryIndex using the provided analyzer
|
||||
* @param field the field to add
|
||||
* @param analyzer the analyzer to use for term analysis
|
||||
* @param boost a field boost
|
||||
* @throws IllegalArgumentException if the field is a DocValues or Point field, as these
|
||||
* structures are not supported by MemoryIndex
|
||||
*/
|
||||
public void addField(IndexableField field, Analyzer analyzer, float boost) {
|
||||
if (field.fieldType().docValuesType() != DocValuesType.NONE)
|
||||
throw new IllegalArgumentException("MemoryIndex does not support DocValues fields");
|
||||
if (field.fieldType().pointDimensionCount() != 0)
|
||||
throw new IllegalArgumentException("MemoryIndex does not support Points");
|
||||
if (analyzer == null) {
|
||||
addField(field.name(), field.tokenStream(null, null), boost);
|
||||
}
|
||||
else {
|
||||
addField(field.name(), field.tokenStream(analyzer, null), boost,
|
||||
analyzer.getPositionIncrementGap(field.name()), analyzer.getOffsetGap(field.name()));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Iterates over the given token stream and adds the resulting terms to the index;
|
||||
|
|
|
@ -22,12 +22,17 @@ import java.io.IOException;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockPayloadAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
|
@ -156,5 +161,27 @@ public class TestMemoryIndex extends LuceneTestCase {
|
|||
TestUtil.checkReader(reader);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testBuildFromDocument() {
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new TextField("field1", "some text", Field.Store.NO));
|
||||
doc.add(new TextField("field1", "some more text", Field.Store.NO));
|
||||
doc.add(new StringField("field2", "untokenized text", Field.Store.NO));
|
||||
|
||||
analyzer.setPositionIncrementGap(100);
|
||||
|
||||
MemoryIndex mi = MemoryIndex.fromDocument(doc, analyzer);
|
||||
|
||||
assertThat(mi.search(new TermQuery(new Term("field1", "text"))), not(0.0f));
|
||||
assertThat(mi.search(new TermQuery(new Term("field2", "text"))), is(0.0f));
|
||||
assertThat(mi.search(new TermQuery(new Term("field2", "untokenized text"))), not(0.0f));
|
||||
|
||||
assertThat(mi.search(new PhraseQuery("field1", "some", "more", "text")), not(0.0f));
|
||||
assertThat(mi.search(new PhraseQuery("field1", "some", "text")), not(0.0f));
|
||||
assertThat(mi.search(new PhraseQuery("field1", "text", "some")), is(0.0f));
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue