mirror of https://github.com/apache/lucene.git
LUCENE-2308: cutover to FieldType
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1162347 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
100ce41c79
commit
4dad0ba89f
|
@ -256,6 +256,11 @@ Changes in Runtime Behavior
|
|||
* LUCENE-3146: IndexReader.setNorm throws IllegalStateException if the field
|
||||
does not store norms. (Shai Erera, Mike McCandless)
|
||||
|
||||
* LUCENE-3309: Stored fields no longer record whether they were
|
||||
tokenized or not. In general you should not rely on stored fields
|
||||
to record any "metadata" from indexing (tokenized, omitNorms,
|
||||
IndexOptions, boost, etc.) (Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2302, LUCENE-1458, LUCENE-2111, LUCENE-2514: Terms are no longer
|
||||
|
|
|
@ -5,6 +5,14 @@ http://s.apache.org/luceneversions
|
|||
|
||||
======================= Trunk (not yet released) =======================
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-3309: Fast vector highlighter now inserts the
|
||||
MultiValuedSeparator for NOT_ANALYZED fields (in addition to
|
||||
ANALYZED fields). To ensure your offsets are correct you should
|
||||
provide an analyzer that returns 1 from the offsetGap method.
|
||||
(Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-2845: Moved contrib/benchmark to modules.
|
||||
|
|
|
@ -22,7 +22,8 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -173,8 +174,7 @@ public class IndexFiles {
|
|||
// field that is indexed (i.e. searchable), but don't tokenize
|
||||
// the field into separate words and don't index term frequency
|
||||
// or positional information:
|
||||
Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
Field pathField = new Field("path", StringField.TYPE_STORED, file.getPath());
|
||||
doc.add(pathField);
|
||||
|
||||
// Add the last modified date of the file a field named "modified".
|
||||
|
@ -192,7 +192,7 @@ public class IndexFiles {
|
|||
// so that the text of the file is tokenized and indexed, but not stored.
|
||||
// Note that FileReader expects the file to be in UTF-8 encoding.
|
||||
// If that's not the case searching for special characters will fail.
|
||||
doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
|
||||
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
|
||||
|
||||
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
|
||||
// New index, so we just add the document (no old document can be there):
|
||||
|
|
|
@ -17,10 +17,26 @@
|
|||
|
||||
package org.apache.lucene.demo.xmlparser;
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Properties;
|
||||
import java.util.StringTokenizer;
|
||||
import javax.servlet.RequestDispatcher;
|
||||
import javax.servlet.ServletConfig;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -33,20 +49,6 @@ import org.apache.lucene.util.Version;
|
|||
import org.apache.lucene.xmlparser.CorePlusExtensionsParser;
|
||||
import org.apache.lucene.xmlparser.QueryTemplateManager;
|
||||
|
||||
import javax.servlet.RequestDispatcher;
|
||||
import javax.servlet.ServletConfig;
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServlet;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import java.io.BufferedReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.Enumeration;
|
||||
import java.util.Properties;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
public class FormBasedXmlQueryDemo extends HttpServlet {
|
||||
|
||||
private QueryTemplateManager queryTemplateManager;
|
||||
|
@ -124,20 +126,18 @@ public class FormBasedXmlQueryDemo extends HttpServlet {
|
|||
InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
|
||||
BufferedReader br = new BufferedReader(new InputStreamReader(dataIn));
|
||||
String line = br.readLine();
|
||||
final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
|
||||
textNoNorms.setOmitNorms(true);
|
||||
while (line != null) {
|
||||
line = line.trim();
|
||||
if (line.length() > 0) {
|
||||
//parse row and create a document
|
||||
StringTokenizer st = new StringTokenizer(line, "\t");
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("location", st.nextToken(), Field.Store.YES,
|
||||
Field.Index.ANALYZED_NO_NORMS));
|
||||
doc.add(new Field("salary", st.nextToken(), Field.Store.YES,
|
||||
Field.Index.ANALYZED_NO_NORMS));
|
||||
doc.add(new Field("type", st.nextToken(), Field.Store.YES,
|
||||
Field.Index.ANALYZED_NO_NORMS));
|
||||
doc.add(new Field("description", st.nextToken(), Field.Store.YES,
|
||||
Field.Index.ANALYZED));
|
||||
doc.add(new Field("location", textNoNorms, st.nextToken()));
|
||||
doc.add(new Field("salary", textNoNorms, st.nextToken()));
|
||||
doc.add(new Field("type", textNoNorms, st.nextToken()));
|
||||
doc.add(new Field("description", textNoNorms, st.nextToken()));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
line = br.readLine();
|
||||
|
|
|
@ -60,10 +60,11 @@ public class TokenSources {
|
|||
* @param analyzer The analyzer to use for creating the TokenStream if the
|
||||
* vector doesn't exist
|
||||
* @return The {@link org.apache.lucene.analysis.TokenStream} for the
|
||||
* {@link org.apache.lucene.document.Fieldable} on the
|
||||
* {@link org.apache.lucene.index.IndexableField} on the
|
||||
* {@link org.apache.lucene.document.Document}
|
||||
* @throws IOException if there was an error loading
|
||||
*/
|
||||
|
||||
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
|
||||
String field, Document doc, Analyzer analyzer) throws IOException {
|
||||
TokenStream ts = null;
|
||||
|
|
|
@ -21,15 +21,18 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.MapFieldSelector;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.search.highlight.DefaultEncoder;
|
||||
import org.apache.lucene.search.highlight.Encoder;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
|
||||
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
||||
|
||||
|
@ -107,10 +110,27 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
return fragments.toArray( new String[fragments.size()] );
|
||||
}
|
||||
|
||||
protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException {
|
||||
protected Field[] getFields( IndexReader reader, int docId, final String fieldName) throws IOException {
|
||||
// according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
|
||||
Document doc = reader.document( docId, new MapFieldSelector(fieldName) );
|
||||
return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null
|
||||
final List<Field> fields = new ArrayList<Field>();
|
||||
reader.document(docId, new StoredFieldVisitor() {
|
||||
@Override
|
||||
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
|
||||
if (fieldInfo.name.equals(fieldName)) {
|
||||
final byte[] b = new byte[numUTF8Bytes];
|
||||
in.readBytes(b, 0, b.length);
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.storeTermVector);
|
||||
ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector);
|
||||
ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector);
|
||||
fields.add(new Field(fieldInfo.name, ft, new String(b, "UTF-8")));
|
||||
} else {
|
||||
in.seek(in.getFilePointer() + numUTF8Bytes);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
});
|
||||
return fields.toArray(new Field[fields.size()]);
|
||||
}
|
||||
|
||||
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
|
||||
|
@ -142,7 +162,6 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
|
|||
int startOffset, int endOffset ){
|
||||
while( buffer.length() < endOffset && index[0] < values.length ){
|
||||
buffer.append( values[index[0]].stringValue() );
|
||||
if( values[index[0]].isTokenized() )
|
||||
buffer.append( multiValuedSeparator );
|
||||
index[0]++;
|
||||
}
|
||||
|
|
|
@ -46,8 +46,12 @@ public class FieldTermStack {
|
|||
// Directory dir = new RAMDirectory();
|
||||
// IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
|
||||
// Document doc = new Document();
|
||||
// doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
// doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
// FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
// ft.setStoreTermVectors(true);
|
||||
// ft.setStoreTermVectorOffsets(true);
|
||||
// ft.setStoreTermVectorPositions(true);
|
||||
// doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
|
||||
// doc.add( new Field( "f", ft, "b a b a f" ) );
|
||||
// writer.addDocument( doc );
|
||||
// writer.close();
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ This is an another highlighter implementation.
|
|||
<li>support N-gram fields</li>
|
||||
<li>support phrase-unit highlighting with slops</li>
|
||||
<li>need Java 1.5</li>
|
||||
<li>highlight fields need to be TermVector.WITH_POSITIONS_OFFSETS</li>
|
||||
<li>highlight fields need to be stored with Positions and Offsets</li>
|
||||
<li>take into account query boost to score fragments</li>
|
||||
<li>support colored highlight tags</li>
|
||||
<li>pluggable FragListBuilder</li>
|
||||
|
@ -95,7 +95,7 @@ will be generated:</p>
|
|||
|
||||
<h3>Step 2.</h3>
|
||||
<p>In Step 2, Fast Vector Highlighter generates {@link org.apache.lucene.search.vectorhighlight.FieldTermStack}. Fast Vector Highlighter uses {@link org.apache.lucene.index.TermFreqVector} data
|
||||
(must be stored {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS_OFFSETS})
|
||||
(must be stored {@link org.apache.lucene.document.FieldType#setStoreTermVectorOffsets(boolean)} and {@link org.apache.lucene.document.FieldType#setStoreTermVectorPositions(boolean)})
|
||||
to generate it. <code>FieldTermStack</code> keeps the terms in the user query.
|
||||
Therefore, in this sample case, Fast Vector Highlighter generates the following <code>FieldTermStack</code>:</p>
|
||||
<pre>
|
||||
|
|
|
@ -28,9 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -61,8 +60,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamConcurrent(),
|
||||
TermVector.WITH_POSITIONS_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectors(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamConcurrent()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -105,8 +107,12 @@ public class HighlighterPhraseTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamConcurrent(),
|
||||
TermVector.WITH_POSITIONS_OFFSETS));
|
||||
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectors(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamConcurrent()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -176,8 +182,12 @@ public class HighlighterPhraseTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamSparse(),
|
||||
TermVector.WITH_POSITIONS_OFFSETS));
|
||||
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectors(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamSparse()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -219,8 +229,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, TEXT, Store.YES, Index.ANALYZED,
|
||||
TermVector.WITH_OFFSETS));
|
||||
|
||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectors(true);
|
||||
document.add(new Field(FIELD, customType, TEXT));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -260,8 +273,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamSparse(),
|
||||
TermVector.WITH_POSITIONS_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectors(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamSparse()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
|
|
@ -38,8 +38,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -1629,7 +1628,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
private Document doc( String f, String v ){
|
||||
Document doc = new Document();
|
||||
doc.add( new Field( f, v, Store.YES, Index.ANALYZED ) );
|
||||
doc.add( new Field( f, TextField.TYPE_STORED, v));
|
||||
return doc;
|
||||
}
|
||||
|
||||
|
@ -1690,7 +1689,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
* QueryFragmentScorer(query));
|
||||
*
|
||||
* for (int i = 0; i < hits.totalHits; i++) { String text =
|
||||
* searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
|
||||
* searcher.doc2(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
|
||||
* tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
|
||||
* String highlightedText = highlighter.getBestFragment(tokenStream,text);
|
||||
* System.out.println(highlightedText); } }
|
||||
|
@ -1744,21 +1743,21 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
addDoc(writer, text);
|
||||
}
|
||||
Document doc = new Document();
|
||||
NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
|
||||
NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
|
||||
nfield.setIntValue(1);
|
||||
doc.add(nfield);
|
||||
writer.addDocument(doc, analyzer);
|
||||
nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
|
||||
nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
|
||||
nfield.setIntValue(3);
|
||||
doc = new Document();
|
||||
doc.add(nfield);
|
||||
writer.addDocument(doc, analyzer);
|
||||
nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
|
||||
nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
|
||||
nfield.setIntValue(5);
|
||||
doc = new Document();
|
||||
doc.add(nfield);
|
||||
writer.addDocument(doc, analyzer);
|
||||
nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
|
||||
nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
|
||||
nfield.setIntValue(7);
|
||||
doc = new Document();
|
||||
doc.add(nfield);
|
||||
|
@ -1779,7 +1778,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
private void addDoc(IndexWriter writer, String text) throws IOException {
|
||||
Document d = new Document();
|
||||
Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.ANALYZED);
|
||||
|
||||
Field f = new Field(FIELD_NAME, TextField.TYPE_STORED, text);
|
||||
d.add(f);
|
||||
writer.addDocument(d);
|
||||
|
||||
|
|
|
@ -28,7 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -107,8 +108,10 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(),
|
||||
TermVector.WITH_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -153,8 +156,11 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(),
|
||||
TermVector.WITH_POSITIONS_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -199,8 +205,10 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(),
|
||||
TermVector.WITH_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
@ -246,8 +254,10 @@ public class TokenSourcesTest extends LuceneTestCase {
|
|||
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
|
||||
try {
|
||||
final Document document = new Document();
|
||||
document.add(new Field(FIELD, new TokenStreamOverlap(),
|
||||
TermVector.WITH_POSITIONS_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
|
||||
indexWriter.addDocument(document);
|
||||
} finally {
|
||||
indexWriter.close();
|
||||
|
|
|
@ -34,14 +34,13 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.search.DisjunctionMaxQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -359,8 +358,13 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
|
||||
Document doc = new Document();
|
||||
for( String value: values )
|
||||
doc.add( new Field( F, value, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
for( String value: values ) {
|
||||
doc.add( new Field( F, customType, value ) );
|
||||
}
|
||||
writer.addDocument( doc );
|
||||
writer.close();
|
||||
if (reader != null) reader.close();
|
||||
|
@ -372,8 +376,14 @@ public abstract class AbstractTestCase extends LuceneTestCase {
|
|||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzerK).setOpenMode(OpenMode.CREATE));
|
||||
Document doc = new Document();
|
||||
for( String value: values )
|
||||
doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
for( String value: values ) {
|
||||
doc.add( new Field( F, customType, value ));
|
||||
//doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
}
|
||||
writer.addDocument( doc );
|
||||
writer.close();
|
||||
if (reader != null) reader.close();
|
||||
|
|
|
@ -19,9 +19,8 @@ package org.apache.lucene.search.vectorhighlight;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -139,7 +138,12 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, analyzerW).setOpenMode(OpenMode.CREATE));
|
||||
Document doc = new Document();
|
||||
doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
doc.add( new Field( F, customType, "aaa" ) );
|
||||
//doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
writer.addDocument( doc );
|
||||
writer.close();
|
||||
if (reader != null) reader.close();
|
||||
|
@ -155,9 +159,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
|
|||
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
|
||||
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
|
||||
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
|
||||
// '/' separator doesn't effect the snippet because of NOT_ANALYZED field
|
||||
sfb.setMultiValuedSeparator( '/' );
|
||||
assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
assertEquals( "abc/<b>defg</b>/hijkl/", sfb.createFragment( reader, 0, F, ffl ) );
|
||||
}
|
||||
|
||||
public void testMVSeparator() throws Exception {
|
||||
|
|
|
@ -68,7 +68,6 @@ public class InstantiatedDocument {
|
|||
return document;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return document.toString();
|
||||
|
|
|
@ -27,8 +27,8 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MultiNorms;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -190,16 +190,16 @@ public class InstantiatedIndex
|
|||
InstantiatedDocument document = new InstantiatedDocument();
|
||||
// copy stored fields from source reader
|
||||
Document sourceDocument = sourceIndexReader.document(i);
|
||||
for (Fieldable field : sourceDocument.getFields()) {
|
||||
for (IndexableField field : sourceDocument) {
|
||||
if (fields == null || fields.contains(field.name())) {
|
||||
document.getDocument().add(field);
|
||||
}
|
||||
}
|
||||
document.setDocumentNumber(i);
|
||||
documentsByNumber[i] = document;
|
||||
for (Fieldable field : document.getDocument().getFields()) {
|
||||
for (IndexableField field : document.getDocument()) {
|
||||
if (fields == null || fields.contains(field.name())) {
|
||||
if (field.isTermVectorStored()) {
|
||||
if (field.storeTermVectors()) {
|
||||
if (document.getVectorSpace() == null) {
|
||||
document.setVectorSpace(new HashMap<String, List<InstantiatedTermDocumentInformation>>());
|
||||
}
|
||||
|
@ -290,8 +290,8 @@ public class InstantiatedIndex
|
|||
if (document == null) {
|
||||
continue; // deleted
|
||||
}
|
||||
for (Fieldable field : document.getDocument().getFields()) {
|
||||
if (field.isTermVectorStored() && field.isStoreOffsetWithTermVector()) {
|
||||
for (IndexableField field : document.getDocument()) {
|
||||
if (field.storeTermVectors() && field.storeTermVectorOffsets()) {
|
||||
TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
|
||||
if (termPositionVector != null) {
|
||||
for (int i = 0; i < termPositionVector.getTerms().length; i++) {
|
||||
|
|
|
@ -30,7 +30,6 @@ import java.util.Set;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -251,42 +250,6 @@ public class InstantiatedIndexReader extends IndexReader {
|
|||
return fieldSet;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup>
|
||||
* position.
|
||||
<p>
|
||||
* <b>Warning!</b>
|
||||
* The resulting document is the actual stored document instance
|
||||
* and not a deserialized clone as retuned by an IndexReader
|
||||
* over a {@link org.apache.lucene.store.Directory}.
|
||||
* I.e., if you need to touch the document, clone it first!
|
||||
* <p>
|
||||
* This can also be seen as a feature for live changes of stored values,
|
||||
* but be careful! Adding a field with an name unknown to the index
|
||||
* or to a field with previously no stored values will make
|
||||
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
|
||||
* out of sync, causing problems for instance when merging the
|
||||
* instantiated index to another index.
|
||||
<p>
|
||||
* This implementation ignores the field selector! All stored fields are always returned!
|
||||
* <p>
|
||||
*
|
||||
* @param n document number
|
||||
* @param fieldSelector ignored
|
||||
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*
|
||||
* @see org.apache.lucene.document.Fieldable
|
||||
* @see org.apache.lucene.document.FieldSelector
|
||||
* @see org.apache.lucene.document.SetBasedFieldSelector
|
||||
* @see org.apache.lucene.document.LoadFirstFieldSelector
|
||||
*/
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
return document(n);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the stored fields of the <code>n</code><sup>th</sup>
|
||||
* <code>Document</code> in this index.
|
||||
|
@ -313,6 +276,11 @@ public class InstantiatedIndexReader extends IndexReader {
|
|||
return getIndex().getDocumentsByNumber()[n].getDocument();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
/**
|
||||
* never ever touch these values. it is the true values, unless norms have
|
||||
* been touched.
|
||||
|
|
|
@ -37,9 +37,9 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -238,7 +238,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
|
||||
final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName;
|
||||
final FieldInvertState invertState = new FieldInvertState();
|
||||
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost());
|
||||
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost);
|
||||
invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
|
||||
normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).computeNorm(invertState);
|
||||
} else {
|
||||
|
@ -469,7 +469,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
// normalize settings per field name in document
|
||||
|
||||
Map<String /* field name */, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
|
||||
for (Fieldable field : document.getDocument().getFields()) {
|
||||
for (IndexableField field : document.getDocument()) {
|
||||
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
|
||||
if (fieldSetting == null) {
|
||||
fieldSetting = new FieldSetting();
|
||||
|
@ -479,52 +479,52 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
}
|
||||
|
||||
// todo: fixme: multiple fields with the same name does not mean field boost += more boost.
|
||||
fieldSetting.boost *= field.getBoost();
|
||||
fieldSetting.boost *= field.boost();
|
||||
//fieldSettings.dimensions++;
|
||||
|
||||
|
||||
// once fieldSettings, always fieldSettings.
|
||||
if (field.getOmitNorms()) {
|
||||
if (field.omitNorms()) {
|
||||
fieldSetting.omitNorms = true;
|
||||
}
|
||||
if (field.isIndexed() ) {
|
||||
if (field.indexed() ) {
|
||||
fieldSetting.indexed = true;
|
||||
}
|
||||
if (field.isTokenized()) {
|
||||
if (field.tokenized()) {
|
||||
fieldSetting.tokenized = true;
|
||||
}
|
||||
if (field.isStored()) {
|
||||
if (field.stored()) {
|
||||
fieldSetting.stored = true;
|
||||
}
|
||||
if (field.isBinary()) {
|
||||
if (field.binaryValue() != null) {
|
||||
fieldSetting.isBinary = true;
|
||||
}
|
||||
if (field.isTermVectorStored()) {
|
||||
if (field.storeTermVectors()) {
|
||||
fieldSetting.storeTermVector = true;
|
||||
}
|
||||
if (field.isStorePositionWithTermVector()) {
|
||||
if (field.storeTermVectorPositions()) {
|
||||
fieldSetting.storePositionWithTermVector = true;
|
||||
}
|
||||
if (field.isStoreOffsetWithTermVector()) {
|
||||
if (field.storeTermVectorOffsets()) {
|
||||
fieldSetting.storeOffsetWithTermVector = true;
|
||||
}
|
||||
}
|
||||
|
||||
Map<Fieldable, LinkedList<Token>> tokensByField = new LinkedHashMap<Fieldable, LinkedList<Token>>(20);
|
||||
Map<IndexableField, LinkedList<Token>> tokensByField = new LinkedHashMap<IndexableField, LinkedList<Token>>(20);
|
||||
|
||||
// tokenize indexed fields.
|
||||
for (Iterator<Fieldable> it = document.getDocument().getFields().iterator(); it.hasNext();) {
|
||||
for (Iterator<IndexableField> it = document.getDocument().iterator(); it.hasNext();) {
|
||||
|
||||
Fieldable field = it.next();
|
||||
IndexableField field = it.next();
|
||||
|
||||
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
|
||||
|
||||
if (field.isIndexed()) {
|
||||
if (field.indexed()) {
|
||||
|
||||
LinkedList<Token> tokens = new LinkedList<Token>();
|
||||
tokensByField.put(field, tokens);
|
||||
|
||||
if (field.isTokenized()) {
|
||||
if (field.tokenized()) {
|
||||
final TokenStream tokenStream;
|
||||
// todo readerValue(), binaryValue()
|
||||
if (field.tokenStreamValue() != null) {
|
||||
|
@ -564,8 +564,8 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
if (!field.isStored()) {
|
||||
it.remove();
|
||||
if (!field.stored()) {
|
||||
//it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -574,7 +574,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
termDocumentInformationFactoryByDocument.put(document, termDocumentInformationFactoryByTermTextAndFieldSetting);
|
||||
|
||||
// build term vector, term positions and term offsets
|
||||
for (Map.Entry<Fieldable, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
|
||||
for (Map.Entry<IndexableField, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
|
||||
FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
|
||||
|
||||
Map<String, TermDocumentInformationFactory> termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
|
||||
|
@ -610,7 +610,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
termDocumentInformationFactory.payloads.add(null);
|
||||
}
|
||||
|
||||
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
|
||||
if (eField_Tokens.getKey().storeTermVectorOffsets()) {
|
||||
|
||||
termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset()));
|
||||
lastOffset = fieldSetting.offset + token.endOffset();
|
||||
|
@ -619,7 +619,7 @@ public class InstantiatedIndexWriter implements Closeable {
|
|||
|
||||
}
|
||||
|
||||
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
|
||||
if (eField_Tokens.getKey().storeTermVectorOffsets()) {
|
||||
fieldSetting.offset = lastOffset + 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiNorms;
|
||||
|
@ -204,19 +206,44 @@ public class TestIndicesEquals extends LuceneTestCase {
|
|||
|
||||
|
||||
private void assembleDocument(Document document, int i) {
|
||||
document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_STORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
//document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
document.add(new Field("a", customType, i + " Do you really want to go and live in that house all winter?"));
|
||||
if (i > 0) {
|
||||
document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
|
||||
document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
|
||||
document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
|
||||
//document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
document.add(new Field("b0", customType, i + " All work and no play makes Jack a dull boy"));
|
||||
|
||||
//document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
|
||||
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
|
||||
customType2.setTokenized(false);
|
||||
customType2.setOmitNorms(true);
|
||||
document.add(new Field("b1", customType2, i + " All work and no play makes Jack a dull boy"));
|
||||
|
||||
//document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
|
||||
FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType3.setTokenized(false);
|
||||
document.add(new Field("b1", customType3, i + " All work and no play makes Jack a dull boy"));
|
||||
|
||||
//document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
|
||||
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
|
||||
customType4.setIndexed(false);
|
||||
customType4.setTokenized(false);
|
||||
document.add(new Field("b1", customType4, i + " All work and no play makes Jack a dull boy"));
|
||||
if (i > 1) {
|
||||
document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
//document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
document.add(new Field("c", customType, i + " Redrum redrum"));
|
||||
if (i > 2) {
|
||||
document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
//document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
document.add(new Field("d", customType, i + " Hello Danny, come and play with us... forever and ever. and ever."));
|
||||
if (i > 3) {
|
||||
Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
f.setOmitNorms(true);
|
||||
//Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
//f.setOmitNorms(true);
|
||||
FieldType customType5 = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType5.setOmitNorms(true);
|
||||
Field f = new Field("e", customType5, i + " Heres Johnny!");
|
||||
document.add(f);
|
||||
if (i > 4) {
|
||||
final List<Token> tokens = new ArrayList<Token>(2);
|
||||
|
@ -247,7 +274,8 @@ public class TestIndicesEquals extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
document.add(new Field("f", ts));
|
||||
//document.add(new Field("f", ts));
|
||||
document.add(new TextField("f", ts));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -43,7 +43,7 @@ public class TestRealTime extends LuceneTestCase {
|
|||
Collector collector;
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new Field("f", "a", Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||
doc.add(new StringField("f", "a"));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
|
@ -52,7 +52,7 @@ public class TestRealTime extends LuceneTestCase {
|
|||
assertEquals(1, collector.hits);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new Field("f", "a", Field.Store.NO, Field.Index.NOT_ANALYZED));
|
||||
doc.add(new StringField("f", "a"));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.store.Directory;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
|
||||
/**
|
||||
* @since 2009-mar-30 13:15:49
|
||||
|
@ -66,7 +66,7 @@ public class TestUnoptimizedReaderOnConstructor extends LuceneTestCase {
|
|||
|
||||
private void addDocument(IndexWriter iw, String text) throws IOException {
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", text, Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(new TextField("field", text));
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -35,23 +35,24 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.FieldsEnum;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermFreqVector;
|
||||
import org.apache.lucene.index.TermPositionVector;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermVectorMapper;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -60,8 +61,8 @@ import org.apache.lucene.search.Similarity;
|
|||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.RAMDirectory; // for javadocs
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Constants; // for javadocs
|
||||
|
||||
/**
|
||||
|
@ -240,11 +241,8 @@ public class MemoryIndex {
|
|||
/**
|
||||
* Convenience method; Tokenizes the given field text and adds the resulting
|
||||
* terms to the index; Equivalent to adding an indexed non-keyword Lucene
|
||||
* {@link org.apache.lucene.document.Field} that is
|
||||
* {@link org.apache.lucene.document.Field.Index#ANALYZED tokenized},
|
||||
* {@link org.apache.lucene.document.Field.Store#NO not stored},
|
||||
* {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions} (or
|
||||
* {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions and offsets}),
|
||||
* {@link org.apache.lucene.document.Field} that is tokenized, not stored,
|
||||
* termVectorStored with positions (or termVectorStored with positions and offsets),
|
||||
*
|
||||
* @param fieldName
|
||||
* a name to be associated with the text
|
||||
|
@ -1237,16 +1235,9 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Document document(int n) {
|
||||
public void document(int docID, StoredFieldVisitor visitor) {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.document");
|
||||
return new Document(); // there are no stored fields
|
||||
}
|
||||
|
||||
//When we convert to JDK 1.5 make this Set<String>
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws IOException {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.document");
|
||||
return new Document(); // there are no stored fields
|
||||
// no-op: there are no stored fields
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.MockTokenFilter;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
|
@ -108,8 +109,8 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
IndexWriter writer = new IndexWriter(ramdir,
|
||||
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
|
||||
Document doc = new Document();
|
||||
Field field1 = newField("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
Field field2 = newField("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
Field field1 = newField("foo", fooField.toString(), TextField.TYPE_UNSTORED);
|
||||
Field field2 = newField("term", termField.toString(), TextField.TYPE_UNSTORED);
|
||||
doc.add(field1);
|
||||
doc.add(field2);
|
||||
writer.addDocument(doc);
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
|
@ -18,7 +19,7 @@ package org.apache.lucene.document;
|
|||
|
||||
/**
|
||||
* Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
|
||||
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
|
||||
* what Fields get loaded on a {@link Document} by {@link FieldSelectorVisitor}
|
||||
*
|
||||
**/
|
||||
public interface FieldSelector {
|
|
@ -20,39 +20,41 @@ package org.apache.lucene.document;
|
|||
* Provides information about what should be done with this Field
|
||||
*
|
||||
**/
|
||||
import org.apache.lucene.index.IndexableField; // for javadocs
|
||||
|
||||
public enum FieldSelectorResult {
|
||||
|
||||
/**
|
||||
* Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encountered.
|
||||
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
|
||||
* {@link Document#getField(String)} should not return null.
|
||||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
* {@link Document#add(IndexableField)} should be called by the Reader.
|
||||
*/
|
||||
LOAD,
|
||||
|
||||
/**
|
||||
* Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until
|
||||
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
|
||||
* return a valid instance of a {@link Fieldable}.
|
||||
* invoked. {@link Document#getField(String)} is safe to use and should
|
||||
* return a valid instance of a {@link IndexableField}.
|
||||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
* {@link Document#add(IndexableField)} should be called by the Reader.
|
||||
*/
|
||||
LAZY_LOAD,
|
||||
|
||||
/**
|
||||
* Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
|
||||
* {@link Document#add(Fieldable)} is not called.
|
||||
* Do not load the {@link Field}. {@link Document#getField(String)} should return null.
|
||||
* {@link Document#add(IndexableField)} is not called.
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should not be called by the Reader.
|
||||
* {@link Document#add(IndexableField)} should not be called by the Reader.
|
||||
*/
|
||||
NO_LOAD,
|
||||
|
||||
/**
|
||||
* Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the
|
||||
* Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
|
||||
* Document may not have its complete set of Fields. {@link Document#getField(String)} should
|
||||
* both be valid for this {@link Field}
|
||||
* <p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
* {@link Document#add(IndexableField)} should be called by the Reader.
|
||||
*/
|
||||
LOAD_AND_BREAK,
|
||||
|
||||
|
@ -67,10 +69,10 @@ public enum FieldSelectorResult {
|
|||
|
||||
/**
|
||||
* Lazily load this {@link Field}, but do not cache the result. This means the {@link Field} is valid, but it may not actually contain its data until
|
||||
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
|
||||
* return a valid instance of a {@link Fieldable}.
|
||||
* invoked. {@link Document#getField(String)} is safe to use and should
|
||||
* return a valid instance of a {@link IndexableField}.
|
||||
*<p/>
|
||||
* {@link Document#add(Fieldable)} should be called by the Reader.
|
||||
* {@link Document#add(IndexableField)} should be called by the Reader.
|
||||
*/
|
||||
LATENT
|
||||
}
|
|
@ -0,0 +1,319 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.NumericField.DataType;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldReaderException;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Create this, passing a legacy {@link FieldSelector} to it, then
|
||||
* pass this class to {@link IndexReader#document(int,
|
||||
* StoredFieldVisitor)}, then call {@link #getDocument} to
|
||||
* retrieve the loaded document.
|
||||
|
||||
* <p><b>NOTE</b>: If you use Lazy fields, you should not
|
||||
* access the returned document after the reader has been
|
||||
* closed!
|
||||
*/
|
||||
|
||||
public class FieldSelectorVisitor extends StoredFieldVisitor {
|
||||
|
||||
private final FieldSelector selector;
|
||||
private final Document doc;
|
||||
|
||||
public FieldSelectorVisitor(FieldSelector selector) {
|
||||
this.selector = selector;
|
||||
doc = new Document();
|
||||
}
|
||||
|
||||
public Document getDocument() {
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException {
|
||||
final FieldSelectorResult accept = selector.accept(fieldInfo.name);
|
||||
switch (accept) {
|
||||
case LOAD:
|
||||
case LOAD_AND_BREAK:
|
||||
final byte[] b = new byte[numBytes];
|
||||
in.readBytes(b, 0, b.length);
|
||||
doc.add(new BinaryField(fieldInfo.name, b));
|
||||
return accept != FieldSelectorResult.LOAD;
|
||||
case LAZY_LOAD:
|
||||
case LATENT:
|
||||
addFieldLazy(in, fieldInfo, true, accept == FieldSelectorResult.LAZY_LOAD, numBytes);
|
||||
return false;
|
||||
case SIZE:
|
||||
case SIZE_AND_BREAK:
|
||||
in.seek(in.getFilePointer() + numBytes);
|
||||
addFieldSize(fieldInfo, numBytes);
|
||||
return accept != FieldSelectorResult.SIZE;
|
||||
default:
|
||||
// skip
|
||||
in.seek(in.getFilePointer() + numBytes);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
|
||||
final FieldSelectorResult accept = selector.accept(fieldInfo.name);
|
||||
switch (accept) {
|
||||
case LOAD:
|
||||
case LOAD_AND_BREAK:
|
||||
final byte[] b = new byte[numUTF8Bytes];
|
||||
in.readBytes(b, 0, b.length);
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.storeTermVector);
|
||||
ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector);
|
||||
ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector);
|
||||
doc.add(new Field(fieldInfo.name, ft, new String(b, "UTF-8")));
|
||||
return accept != FieldSelectorResult.LOAD;
|
||||
case LAZY_LOAD:
|
||||
case LATENT:
|
||||
addFieldLazy(in, fieldInfo, false, accept == FieldSelectorResult.LAZY_LOAD, numUTF8Bytes);
|
||||
return false;
|
||||
case SIZE:
|
||||
case SIZE_AND_BREAK:
|
||||
in.seek(in.getFilePointer() + numUTF8Bytes);
|
||||
addFieldSize(fieldInfo, 2*numUTF8Bytes);
|
||||
return accept != FieldSelectorResult.SIZE;
|
||||
default:
|
||||
// skip
|
||||
in.seek(in.getFilePointer() + numUTF8Bytes);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean intField(FieldInfo fieldInfo, int value) throws IOException {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
ft.setOmitNorms(fieldInfo.omitNorms);
|
||||
ft.setIndexOptions(fieldInfo.indexOptions);
|
||||
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setIntValue(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean longField(FieldInfo fieldInfo, long value) throws IOException {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
ft.setOmitNorms(fieldInfo.omitNorms);
|
||||
ft.setIndexOptions(fieldInfo.indexOptions);
|
||||
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setLongValue(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean floatField(FieldInfo fieldInfo, float value) throws IOException {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
ft.setOmitNorms(fieldInfo.omitNorms);
|
||||
ft.setIndexOptions(fieldInfo.indexOptions);
|
||||
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setFloatValue(value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doubleField(FieldInfo fieldInfo, double value) throws IOException {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
ft.setOmitNorms(fieldInfo.omitNorms);
|
||||
ft.setIndexOptions(fieldInfo.indexOptions);
|
||||
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setDoubleValue(value));
|
||||
}
|
||||
|
||||
private boolean addNumericField(FieldInfo fieldInfo, NumericField f) {
|
||||
doc.add(f);
|
||||
final FieldSelectorResult accept = selector.accept(fieldInfo.name);
|
||||
switch (accept) {
|
||||
case LOAD:
|
||||
return false;
|
||||
case LOAD_AND_BREAK:
|
||||
return true;
|
||||
case LAZY_LOAD:
|
||||
case LATENT:
|
||||
return false;
|
||||
case SIZE:
|
||||
return false;
|
||||
case SIZE_AND_BREAK:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
private void addFieldLazy(IndexInput in, FieldInfo fi, boolean binary, boolean cacheResult, int numBytes) throws IOException {
|
||||
final IndexableField f;
|
||||
final long pointer = in.getFilePointer();
|
||||
// Need to move the pointer ahead by toRead positions
|
||||
in.seek(pointer+numBytes);
|
||||
FieldType ft = new FieldType();
|
||||
ft.setStored(true);
|
||||
ft.setOmitNorms(fi.omitNorms);
|
||||
ft.setIndexOptions(fi.indexOptions);
|
||||
ft.setLazy(true);
|
||||
|
||||
if (binary) {
|
||||
f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult);
|
||||
} else {
|
||||
ft.setStoreTermVectors(fi.storeTermVector);
|
||||
ft.setStoreTermVectorOffsets(fi.storeOffsetWithTermVector);
|
||||
ft.setStoreTermVectorPositions(fi.storePositionWithTermVector);
|
||||
f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult);
|
||||
}
|
||||
|
||||
doc.add(f);
|
||||
}
|
||||
|
||||
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
||||
// Read just the size -- caller must skip the field content to continue reading fields
|
||||
// Return the size in bytes or chars, depending on field type
|
||||
private void addFieldSize(FieldInfo fi, int numBytes) throws IOException {
|
||||
byte[] sizebytes = new byte[4];
|
||||
sizebytes[0] = (byte) (numBytes>>>24);
|
||||
sizebytes[1] = (byte) (numBytes>>>16);
|
||||
sizebytes[2] = (byte) (numBytes>>> 8);
|
||||
sizebytes[3] = (byte) numBytes ;
|
||||
doc.add(new BinaryField(fi.name, sizebytes));
|
||||
}
|
||||
|
||||
/**
|
||||
* A Lazy field implementation that defers loading of fields until asked for, instead of when the Document is
|
||||
* loaded.
|
||||
*/
|
||||
private static class LazyField extends Field {
|
||||
private int toRead;
|
||||
private long pointer;
|
||||
private final boolean cacheResult;
|
||||
private final IndexInput in;
|
||||
private boolean isBinary;
|
||||
|
||||
public LazyField(IndexInput in, String name, FieldType ft, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
|
||||
super(name, ft);
|
||||
this.in = in;
|
||||
this.toRead = toRead;
|
||||
this.pointer = pointer;
|
||||
this.isBinary = isBinary;
|
||||
this.cacheResult = cacheResult;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number numericValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DataType numericDataType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
private IndexInput localFieldsStream;
|
||||
|
||||
private IndexInput getFieldStream() {
|
||||
if (localFieldsStream == null) {
|
||||
localFieldsStream = (IndexInput) in.clone();
|
||||
}
|
||||
return localFieldsStream;
|
||||
}
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
@Override
|
||||
public Reader readerValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** The value of the field as a TokenStream, or null. If null, the Reader value,
|
||||
* String value, or binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
@Override
|
||||
public TokenStream tokenStreamValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
@Override
|
||||
synchronized public String stringValue() {
|
||||
if (isBinary) {
|
||||
return null;
|
||||
} else {
|
||||
if (fieldsData == null) {
|
||||
String result = null;
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
byte[] bytes = new byte[toRead];
|
||||
localFieldsStream.readBytes(bytes, 0, toRead);
|
||||
result = new String(bytes, "UTF-8");
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
if (cacheResult == true){
|
||||
fieldsData = result;
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
return (String) fieldsData;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
synchronized public BytesRef binaryValue() {
|
||||
if (isBinary) {
|
||||
if (fieldsData == null) {
|
||||
// Allocate new buffer if result is null or too small
|
||||
final byte[] b = new byte[toRead];
|
||||
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
|
||||
// Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
|
||||
// since they are already handling this exception when getting the document
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, toRead);
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
|
||||
final BytesRef result = new BytesRef(b);
|
||||
result.length = toRead;
|
||||
if (cacheResult == true){
|
||||
fieldsData = result;
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
return (BytesRef) fieldsData;
|
||||
}
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,6 +1,5 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import java.util.Set;
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
|
@ -17,10 +16,14 @@ import java.util.Set;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Set;
|
||||
|
||||
|
||||
/**
|
||||
* Declare what fields to load normally and what fields to load lazily
|
||||
*
|
||||
**/
|
||||
|
||||
public class SetBasedFieldSelector implements FieldSelector {
|
||||
|
||||
private Set<String> fieldsToLoad;
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.util.ReaderUtil;
|
|||
*
|
||||
* If Similarity class is specified, uses its computeNorm method to set norms.
|
||||
* If -n command line argument is used, removed field norms, as if
|
||||
* {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used.
|
||||
* {@link org.apache.lucene.document.FieldType#setOmitNorms(boolean)} was used.
|
||||
*
|
||||
* <p>
|
||||
* NOTE: This will overwrite any length normalization or field/document boosts.
|
||||
|
|
|
@ -0,0 +1,318 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.document.FieldSelectorVisitor;
|
||||
import org.apache.lucene.document.LoadFirstFieldSelector;
|
||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
||||
public class TestContribFieldsReader extends LuceneTestCase {
|
||||
private static Directory dir;
|
||||
private static org.apache.lucene.document.Document testDoc = new org.apache.lucene.document.Document();
|
||||
private static FieldInfos fieldInfos = null;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
fieldInfos = new FieldInfos();
|
||||
DocHelper.setupDoc(testDoc);
|
||||
_TestUtil.add(testDoc, fieldInfos);
|
||||
dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy());
|
||||
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
writer.addDocument(testDoc);
|
||||
writer.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
dir.close();
|
||||
dir = null;
|
||||
fieldInfos = null;
|
||||
testDoc = null;
|
||||
}
|
||||
|
||||
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
|
||||
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
|
||||
ir.document(docID, visitor);
|
||||
return visitor.getDocument();
|
||||
}
|
||||
|
||||
public void testLazyFields() throws Exception {
|
||||
assertTrue(dir != null);
|
||||
assertTrue(fieldInfos != null);
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
Set<String> loadFieldNames = new HashSet<String>();
|
||||
loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
|
||||
loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
|
||||
Set<String> lazyFieldNames = new HashSet<String>();
|
||||
//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
|
||||
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
|
||||
Document doc = getDocument(reader, 0, fieldSelector);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
IndexableField field = doc.getField(DocHelper.LAZY_FIELD_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("field is not lazy and it should be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
String value = field.stringValue();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
|
||||
assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
|
||||
|
||||
field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
|
||||
|
||||
byte [] bytes = field.binaryValue().bytes;
|
||||
assertTrue("bytes is null and it shouldn't be", bytes != null);
|
||||
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
|
||||
assertTrue("calling binaryValue() twice should give same reference", field.binaryValue().bytes == field.binaryValue().bytes);
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
|
||||
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
||||
public void testLatentFields() throws Exception {
|
||||
assertTrue(dir != null);
|
||||
assertTrue(fieldInfos != null);
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
Set<String> loadFieldNames = new HashSet<String>();
|
||||
loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
|
||||
loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
|
||||
Set<String> lazyFieldNames = new HashSet<String>();
|
||||
//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
|
||||
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
|
||||
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
|
||||
// Use LATENT instead of LAZY
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) {
|
||||
@Override
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
final FieldSelectorResult result = super.accept(fieldName);
|
||||
if (result == FieldSelectorResult.LAZY_LOAD) {
|
||||
return FieldSelectorResult.LATENT;
|
||||
} else {
|
||||
return result;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
Document doc = getDocument(reader, 0, fieldSelector);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
IndexableField field = doc.getField(DocHelper.LAZY_FIELD_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("field is not lazy and it should be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
String value = field.stringValue();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
|
||||
assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
|
||||
assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
|
||||
|
||||
field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
|
||||
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
|
||||
assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
|
||||
|
||||
field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
|
||||
assertTrue("calling binaryValue() twice should give different references", field.binaryValue().bytes != field.binaryValue().bytes);
|
||||
|
||||
byte [] bytes = field.binaryValue().bytes;
|
||||
assertTrue("bytes is null and it shouldn't be", bytes != null);
|
||||
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
|
||||
for (int i = 0; i < bytes.length; i++) {
|
||||
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
|
||||
|
||||
}
|
||||
reader.close();
|
||||
}
|
||||
|
||||
public void testLoadFirst() throws Exception {
|
||||
assertTrue(dir != null);
|
||||
assertTrue(fieldInfos != null);
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
|
||||
Document doc = getDocument(reader, 0, fieldSelector);
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
int count = 0;
|
||||
List<IndexableField> l = doc.getFields();
|
||||
for (final IndexableField IndexableField : l ) {
|
||||
Field field = (Field) IndexableField;
|
||||
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
String sv = field.stringValue();
|
||||
assertTrue("sv is null and it shouldn't be", sv != null);
|
||||
count++;
|
||||
}
|
||||
assertTrue(count + " does not equal: " + 1, count == 1);
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Not really a test per se, but we should have some way of assessing whether this is worthwhile.
|
||||
* <p/>
|
||||
* Must test using a File based directory
|
||||
*
|
||||
* @throws Exception
|
||||
*/
|
||||
public void testLazyPerformance() throws Exception {
|
||||
String userName = System.getProperty("user.name");
|
||||
File file = _TestUtil.getTempDir("lazyDir" + userName);
|
||||
Directory tmpDir = newFSDirectory(file);
|
||||
assertTrue(tmpDir != null);
|
||||
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy());
|
||||
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
|
||||
IndexWriter writer = new IndexWriter(tmpDir, conf);
|
||||
writer.addDocument(testDoc);
|
||||
writer.close();
|
||||
|
||||
assertTrue(fieldInfos != null);
|
||||
long lazyTime = 0;
|
||||
long regularTime = 0;
|
||||
int length = 10;
|
||||
Set<String> lazyFieldNames = new HashSet<String>();
|
||||
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections. <String> emptySet(), lazyFieldNames);
|
||||
|
||||
for (int i = 0; i < length; i++) {
|
||||
IndexReader reader = IndexReader.open(tmpDir);
|
||||
|
||||
Document doc;
|
||||
doc = reader.document(0);//Load all of them
|
||||
assertTrue("doc is null and it shouldn't be", doc != null);
|
||||
IndexableField field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
assertTrue("field is null and it shouldn't be", field != null);
|
||||
assertFalse("field is lazy", field.getClass().getSimpleName().equals("LazyField"));
|
||||
String value;
|
||||
long start;
|
||||
long finish;
|
||||
start = System.currentTimeMillis();
|
||||
//On my machine this was always 0ms.
|
||||
value = field.stringValue();
|
||||
finish = System.currentTimeMillis();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
regularTime += (finish - start);
|
||||
reader.close();
|
||||
reader = null;
|
||||
doc = null;
|
||||
//Hmmm, are we still in cache???
|
||||
System.gc();
|
||||
reader = IndexReader.open(tmpDir);
|
||||
doc = getDocument(reader, 0, fieldSelector);
|
||||
field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
|
||||
assertTrue("field is not lazy", field.getClass().getSimpleName().equals("LazyField"));
|
||||
start = System.currentTimeMillis();
|
||||
//On my machine this took around 50 - 70ms
|
||||
value = field.stringValue();
|
||||
finish = System.currentTimeMillis();
|
||||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
lazyTime += (finish - start);
|
||||
reader.close();
|
||||
|
||||
}
|
||||
tmpDir.close();
|
||||
if (VERBOSE) {
|
||||
System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
|
||||
System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
|
||||
}
|
||||
}
|
||||
|
||||
public void testLoadSize() throws IOException {
|
||||
IndexReader reader = IndexReader.open(dir);
|
||||
Document doc;
|
||||
|
||||
doc = getDocument(reader, 0, new FieldSelector(){
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
|
||||
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
|
||||
return FieldSelectorResult.SIZE;
|
||||
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
|
||||
return FieldSelectorResult.LOAD;
|
||||
else
|
||||
return FieldSelectorResult.NO_LOAD;
|
||||
}
|
||||
});
|
||||
IndexableField f1 = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
|
||||
IndexableField f3 = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
|
||||
IndexableField fb = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
|
||||
assertTrue(f1.binaryValue()!=null);
|
||||
assertTrue(f3.binaryValue()==null);
|
||||
assertTrue(fb.binaryValue()!=null);
|
||||
assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue().bytes);
|
||||
assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue());
|
||||
assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue().bytes);
|
||||
|
||||
reader.close();
|
||||
}
|
||||
|
||||
private void assertSizeEquals(int size, byte[] sizebytes) {
|
||||
assertEquals((byte) (size>>>24), sizebytes[0]);
|
||||
assertEquals((byte) (size>>>16), sizebytes[1]);
|
||||
assertEquals((byte) (size>>> 8), sizebytes[2]);
|
||||
assertEquals((byte) size , sizebytes[3]);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,169 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.BinaryField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorVisitor;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.SetBasedFieldSelector;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestContribIndexReader extends LuceneTestCase {
|
||||
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
|
||||
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
|
||||
ir.document(docID, visitor);
|
||||
return visitor.getDocument();
|
||||
}
|
||||
|
||||
static void addDoc(IndexWriter writer, String value) throws IOException {
|
||||
Document doc = new Document();
|
||||
doc.add(newField("content", value, TextField.TYPE_UNSTORED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
static void addDocumentWithFields(IndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
|
||||
FieldType customType3 = new FieldType();
|
||||
customType3.setStored(true);
|
||||
doc.add(newField("keyword", "test1", StringField.TYPE_STORED));
|
||||
doc.add(newField("text", "test1", TextField.TYPE_STORED));
|
||||
doc.add(newField("unindexed", "test1", customType3));
|
||||
doc.add(new TextField("unstored","test1"));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
|
||||
static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
|
||||
FieldType customType3 = new FieldType();
|
||||
customType3.setStored(true);
|
||||
doc.add(newField("keyword2", "test1", StringField.TYPE_STORED));
|
||||
doc.add(newField("text2", "test1", TextField.TYPE_STORED));
|
||||
doc.add(newField("unindexed2", "test1", customType3));
|
||||
doc.add(new TextField("unstored2","test1"));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
static void addDocumentWithTermVectorFields(IndexWriter writer) throws IOException {
|
||||
Document doc = new Document();
|
||||
FieldType customType5 = new FieldType(TextField.TYPE_STORED);
|
||||
customType5.setStoreTermVectors(true);
|
||||
FieldType customType6 = new FieldType(TextField.TYPE_STORED);
|
||||
customType6.setStoreTermVectors(true);
|
||||
customType6.setStoreTermVectorOffsets(true);
|
||||
FieldType customType7 = new FieldType(TextField.TYPE_STORED);
|
||||
customType7.setStoreTermVectors(true);
|
||||
customType7.setStoreTermVectorPositions(true);
|
||||
FieldType customType8 = new FieldType(TextField.TYPE_STORED);
|
||||
customType8.setStoreTermVectors(true);
|
||||
customType8.setStoreTermVectorOffsets(true);
|
||||
customType8.setStoreTermVectorPositions(true);
|
||||
doc.add(newField("tvnot","tvnot",TextField.TYPE_STORED));
|
||||
doc.add(newField("termvector","termvector",customType5));
|
||||
doc.add(newField("tvoffset","tvoffset", customType6));
|
||||
doc.add(newField("tvposition","tvposition", customType7));
|
||||
doc.add(newField("tvpositionoffset","tvpositionoffset", customType8));
|
||||
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
public void testBinaryFields() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
|
||||
|
||||
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
|
||||
|
||||
for (int i = 0; i < 10; i++) {
|
||||
addDoc(writer, "document number " + (i + 1));
|
||||
addDocumentWithFields(writer);
|
||||
addDocumentWithDifferentFields(writer);
|
||||
addDocumentWithTermVectorFields(writer);
|
||||
}
|
||||
writer.close();
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryField("bin1", bin));
|
||||
doc.add(new TextField("junk", "junk text"));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, false);
|
||||
Document doc2 = reader.document(reader.maxDoc() - 1);
|
||||
IndexableField[] fields = doc2.getFields("bin1");
|
||||
assertNotNull(fields);
|
||||
assertEquals(1, fields.length);
|
||||
Field b1 = (Field) fields[0];
|
||||
assertTrue(b1.isBinary());
|
||||
BytesRef bytesRef = b1.binaryValue();
|
||||
assertEquals(bin.length, bytesRef.length);
|
||||
for (int i = 0; i < bin.length; i++) {
|
||||
assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
|
||||
}
|
||||
Set<String> lazyFields = new HashSet<String>();
|
||||
lazyFields.add("bin1");
|
||||
FieldSelector sel = new SetBasedFieldSelector(new HashSet<String>(), lazyFields);
|
||||
doc2 = getDocument(reader, reader.maxDoc() - 1, sel);
|
||||
fields = doc2.getFields("bin1");
|
||||
assertNotNull(fields);
|
||||
assertEquals(1, fields.length);
|
||||
IndexableField fb1 = fields[0];
|
||||
assertTrue(fb1.binaryValue()!=null);
|
||||
bytesRef = fb1.binaryValue();
|
||||
assertEquals(bin.length, bytesRef.bytes.length);
|
||||
assertEquals(bin.length, bytesRef.length);
|
||||
for (int i = 0; i < bin.length; i++) {
|
||||
assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
|
||||
}
|
||||
reader.close();
|
||||
// force optimize
|
||||
|
||||
|
||||
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
reader = IndexReader.open(dir, false);
|
||||
doc2 = reader.document(reader.maxDoc() - 1);
|
||||
fields = doc2.getFields("bin1");
|
||||
assertNotNull(fields);
|
||||
assertEquals(1, fields.length);
|
||||
b1 = (Field) fields[0];
|
||||
assertTrue(b1.isBinary());
|
||||
bytesRef = b1.binaryValue();
|
||||
assertEquals(bin.length, bytesRef.length);
|
||||
for (int i = 0; i < bin.length; i++) {
|
||||
assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
|
||||
}
|
||||
reader.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,149 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorVisitor;
|
||||
import org.apache.lucene.document.MapFieldSelector;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestContribParallelReader extends LuceneTestCase {
|
||||
|
||||
private IndexSearcher parallel;
|
||||
private IndexSearcher single;
|
||||
private Directory dir, dir1, dir2;
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
single = single(random);
|
||||
parallel = parallel(random);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tearDown() throws Exception {
|
||||
single.getIndexReader().close();
|
||||
single.close();
|
||||
parallel.getIndexReader().close();
|
||||
parallel.close();
|
||||
dir.close();
|
||||
dir1.close();
|
||||
dir2.close();
|
||||
super.tearDown();
|
||||
}
|
||||
|
||||
// Fields 1-4 indexed together:
|
||||
private IndexSearcher single(Random random) throws IOException {
|
||||
dir = newDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
Document d1 = new Document();
|
||||
d1.add(newField("f1", "v1", TextField.TYPE_STORED));
|
||||
d1.add(newField("f2", "v1", TextField.TYPE_STORED));
|
||||
d1.add(newField("f3", "v1", TextField.TYPE_STORED));
|
||||
d1.add(newField("f4", "v1", TextField.TYPE_STORED));
|
||||
w.addDocument(d1);
|
||||
Document d2 = new Document();
|
||||
d2.add(newField("f1", "v2", TextField.TYPE_STORED));
|
||||
d2.add(newField("f2", "v2", TextField.TYPE_STORED));
|
||||
d2.add(newField("f3", "v2", TextField.TYPE_STORED));
|
||||
d2.add(newField("f4", "v2", TextField.TYPE_STORED));
|
||||
w.addDocument(d2);
|
||||
w.close();
|
||||
|
||||
return new IndexSearcher(dir, false);
|
||||
}
|
||||
|
||||
// Fields 1 & 2 in one index, 3 & 4 in other, with ParallelReader:
|
||||
private IndexSearcher parallel(Random random) throws IOException {
|
||||
dir1 = getDir1(random);
|
||||
dir2 = getDir2(random);
|
||||
ParallelReader pr = new ParallelReader();
|
||||
pr.add(IndexReader.open(dir1, false));
|
||||
pr.add(IndexReader.open(dir2, false));
|
||||
return newSearcher(pr);
|
||||
}
|
||||
|
||||
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
|
||||
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
|
||||
ir.document(docID, visitor);
|
||||
return visitor.getDocument();
|
||||
}
|
||||
|
||||
public void testDocument() throws IOException {
|
||||
Directory dir1 = getDir1(random);
|
||||
Directory dir2 = getDir2(random);
|
||||
ParallelReader pr = new ParallelReader();
|
||||
pr.add(IndexReader.open(dir1, false));
|
||||
pr.add(IndexReader.open(dir2, false));
|
||||
|
||||
Document doc11 = getDocument(pr, 0, new MapFieldSelector("f1"));
|
||||
Document doc24 = getDocument(pr, 1, new MapFieldSelector(Arrays.asList("f4")));
|
||||
Document doc223 = getDocument(pr, 1, new MapFieldSelector("f2", "f3"));
|
||||
|
||||
assertEquals(1, doc11.getFields().size());
|
||||
assertEquals(1, doc24.getFields().size());
|
||||
assertEquals(2, doc223.getFields().size());
|
||||
|
||||
assertEquals("v1", doc11.get("f1"));
|
||||
assertEquals("v2", doc24.get("f4"));
|
||||
assertEquals("v2", doc223.get("f2"));
|
||||
assertEquals("v2", doc223.get("f3"));
|
||||
pr.close();
|
||||
dir1.close();
|
||||
dir2.close();
|
||||
}
|
||||
|
||||
private Directory getDir1(Random random) throws IOException {
|
||||
Directory dir1 = newDirectory();
|
||||
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
Document d1 = new Document();
|
||||
d1.add(newField("f1", "v1", TextField.TYPE_STORED));
|
||||
d1.add(newField("f2", "v1", TextField.TYPE_STORED));
|
||||
w1.addDocument(d1);
|
||||
Document d2 = new Document();
|
||||
d2.add(newField("f1", "v2", TextField.TYPE_STORED));
|
||||
d2.add(newField("f2", "v2", TextField.TYPE_STORED));
|
||||
w1.addDocument(d2);
|
||||
w1.close();
|
||||
return dir1;
|
||||
}
|
||||
|
||||
private Directory getDir2(Random random) throws IOException {
|
||||
Directory dir2 = newDirectory();
|
||||
IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
|
||||
Document d3 = new Document();
|
||||
d3.add(newField("f3", "v1", TextField.TYPE_STORED));
|
||||
d3.add(newField("f4", "v1", TextField.TYPE_STORED));
|
||||
w2.addDocument(d3);
|
||||
Document d4 = new Document();
|
||||
d4.add(newField("f3", "v2", TextField.TYPE_STORED));
|
||||
d4.add(newField("f4", "v2", TextField.TYPE_STORED));
|
||||
w2.addDocument(d4);
|
||||
w2.close();
|
||||
return dir2;
|
||||
}
|
||||
}
|
|
@ -22,7 +22,8 @@ import java.util.Arrays;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
|
@ -65,13 +66,15 @@ public class TestFieldNormModifier extends LuceneTestCase {
|
|||
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
Document d = new Document();
|
||||
d.add(newField("field", "word", Field.Store.YES, Field.Index.ANALYZED));
|
||||
d.add(newField("nonorm", "word", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
d.add(newField("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
||||
d.add(newField("field", "word", TextField.TYPE_STORED));
|
||||
|
||||
d.add(newField("nonorm", "word", StringField.TYPE_STORED));
|
||||
d.add(newField("untokfield", "20061212 20071212", TextField.TYPE_STORED));
|
||||
|
||||
for (int j = 1; j <= i; j++) {
|
||||
d.add(newField("field", "crap", Field.Store.YES, Field.Index.ANALYZED));
|
||||
d.add(newField("nonorm", "more words", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
d.add(newField("field", "crap", TextField.TYPE_STORED));
|
||||
d.add(newField("nonorm", "more words", StringField.TYPE_STORED));
|
||||
}
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.File;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -112,10 +113,10 @@ public class TestIndexSplitter extends LuceneTestCase {
|
|||
Directory fsDir = newFSDirectory(indexPath);
|
||||
IndexWriter indexWriter = new IndexWriter(fsDir, iwConfig);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", "doc 1", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
|
||||
doc.add(new Field("content", StringField.TYPE_STORED, "doc 1"));
|
||||
indexWriter.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new Field("content", "doc 2", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
|
||||
doc.add(new Field("content", StringField.TYPE_STORED, "doc 2"));
|
||||
indexWriter.addDocument(doc);
|
||||
indexWriter.close();
|
||||
fsDir.close();
|
||||
|
|
|
@ -22,7 +22,11 @@ import java.util.List;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.document.FieldSelectorVisitor;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.AfterClass;
|
||||
|
@ -87,8 +91,7 @@ public class TestLazyBug extends LuceneTestCase {
|
|||
doc.add(newField("f"+f,
|
||||
data[f % data.length]
|
||||
+ '#' + data[random.nextInt(data.length)],
|
||||
Field.Store.NO,
|
||||
Field.Index.ANALYZED));
|
||||
TextField.TYPE_UNSTORED));
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
@ -102,12 +105,14 @@ public class TestLazyBug extends LuceneTestCase {
|
|||
public void doTest(int[] docs) throws Exception {
|
||||
IndexReader reader = IndexReader.open(directory, true);
|
||||
for (int i = 0; i < docs.length; i++) {
|
||||
Document d = reader.document(docs[i], SELECTOR);
|
||||
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(SELECTOR);
|
||||
reader.document(docs[i], visitor);
|
||||
Document d = visitor.getDocument();
|
||||
d.get(MAGIC_FIELD);
|
||||
|
||||
List<Fieldable> fields = d.getFields();
|
||||
for (Iterator<Fieldable> fi = fields.iterator(); fi.hasNext(); ) {
|
||||
Fieldable f=null;
|
||||
List<IndexableField> fields = d.getFields();
|
||||
for (Iterator<IndexableField> fi = fields.iterator(); fi.hasNext(); ) {
|
||||
IndexableField f=null;
|
||||
try {
|
||||
f = fi.next();
|
||||
String fname = f.name();
|
||||
|
@ -136,5 +141,4 @@ public class TestLazyBug extends LuceneTestCase {
|
|||
public void testLazyBroken() throws Exception {
|
||||
doTest(new int[] { NUM_DOCS/2, NUM_DOCS-1 });
|
||||
}
|
||||
|
||||
}
|
|
@ -18,10 +18,11 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
||||
IndexReader input;
|
||||
|
@ -36,8 +37,8 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
|
|||
Document doc;
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
doc = new Document();
|
||||
doc.add(newField("id", i + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
doc.add(newField("f", i + " " + i, Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("id", i + "", StringField.TYPE_STORED));
|
||||
doc.add(newField("f", i + " " + i, TextField.TYPE_STORED));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
w.close();
|
||||
|
|
|
@ -33,7 +33,8 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
|
@ -77,17 +78,12 @@ public class TestNRTManager extends LuceneTestCase {
|
|||
// TODO: is there a pre-existing way to do this!!!
|
||||
private Document cloneDoc(Document doc1) {
|
||||
final Document doc2 = new Document();
|
||||
for(Fieldable f : doc1.getFields()) {
|
||||
for(IndexableField f : doc1) {
|
||||
Field field1 = (Field) f;
|
||||
|
||||
Field field2 = new Field(field1.name(),
|
||||
field1.stringValue(),
|
||||
field1.isStored() ? Field.Store.YES : Field.Store.NO,
|
||||
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
|
||||
if (field1.getOmitNorms()) {
|
||||
field2.setOmitNorms(true);
|
||||
}
|
||||
field2.setIndexOptions(field1.getIndexOptions());
|
||||
((Field) f).getFieldType(),
|
||||
field1.stringValue());
|
||||
doc2.add(field2);
|
||||
}
|
||||
|
||||
|
@ -240,7 +236,7 @@ public class TestNRTManager extends LuceneTestCase {
|
|||
final String addedField;
|
||||
if (random.nextBoolean()) {
|
||||
addedField = "extra" + random.nextInt(10);
|
||||
doc.add(new Field(addedField, "a random field", Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(new TextField(addedField, "a random field"));
|
||||
} else {
|
||||
addedField = null;
|
||||
}
|
||||
|
@ -262,7 +258,7 @@ public class TestNRTManager extends LuceneTestCase {
|
|||
packID = packCount.getAndIncrement() + "";
|
||||
}
|
||||
|
||||
final Field packIDField = newField("packID", packID, Field.Store.YES, Field.Index.NOT_ANALYZED);
|
||||
final Field packIDField = newField("packID", packID, StringField.TYPE_STORED);
|
||||
final List<String> docIDs = new ArrayList<String>();
|
||||
final SubDocs subDocs = new SubDocs(packID, docIDs);
|
||||
final List<Document> docsList = new ArrayList<Document>();
|
||||
|
|
|
@ -23,8 +23,8 @@ import java.text.NumberFormat;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -102,15 +102,15 @@ public class TestPKIndexSplitter extends LuceneTestCase {
|
|||
StringBuilder sb = new StringBuilder();
|
||||
Document doc = new Document();
|
||||
String id = format.format(n);
|
||||
doc.add(newField("id", id, Store.YES, Index.NOT_ANALYZED));
|
||||
doc.add(newField("indexname", indexName, Store.YES, Index.NOT_ANALYZED));
|
||||
doc.add(newField("id", id, StringField.TYPE_STORED));
|
||||
doc.add(newField("indexname", indexName, StringField.TYPE_STORED));
|
||||
sb.append("a");
|
||||
sb.append(n);
|
||||
doc.add(newField("field1", sb.toString(), Store.YES, Index.ANALYZED));
|
||||
doc.add(newField("field1", sb.toString(), TextField.TYPE_STORED));
|
||||
sb.append(" b");
|
||||
sb.append(n);
|
||||
for (int i = 1; i < numFields; i++) {
|
||||
doc.add(newField("field" + (i + 1), sb.toString(), Store.YES, Index.ANALYZED));
|
||||
doc.add(newField("field" + (i + 1), sb.toString(), TextField.TYPE_STORED));
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
|
|
@ -2,7 +2,8 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
|
@ -30,33 +31,42 @@ public class TestTermVectorAccessor extends LuceneTestCase {
|
|||
Document doc;
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setStoreTermVectors(true);
|
||||
customType.setStoreTermVectorPositions(true);
|
||||
customType.setStoreTermVectorOffsets(true);
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", customType));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", customType));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
|
||||
FieldType customType2 = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType2.setStoreTermVectors(true);
|
||||
customType2.setStoreTermVectorPositions(true);
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType2));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", customType2));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", customType2));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||
FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType3.setStoreTermVectors(true);
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType3));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", customType3));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", customType3));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", TextField.TYPE_UNSTORED));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", TextField.TYPE_UNSTORED));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", TextField.TYPE_UNSTORED));
|
||||
iw.addDocument(doc);
|
||||
|
||||
doc = new Document();
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
|
||||
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType));
|
||||
doc.add(newField("b", "a b c b d b e b f b g b h b", TextField.TYPE_UNSTORED));
|
||||
doc.add(newField("c", "a c b c d c e c f c g c h c", customType3));
|
||||
iw.addDocument(doc);
|
||||
|
||||
iw.close();
|
||||
|
|
|
@ -22,9 +22,8 @@ import java.util.Random;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -141,7 +140,11 @@ public class TestAppendingCodec extends LuceneTestCase {
|
|||
((TieredMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
|
||||
IndexWriter writer = new IndexWriter(dir, cfg);
|
||||
Document doc = new Document();
|
||||
doc.add(newField("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
|
||||
FieldType storedTextType = new FieldType(TextField.TYPE_STORED);
|
||||
storedTextType.setStoreTermVectors(true);
|
||||
storedTextType.setStoreTermVectorPositions(true);
|
||||
storedTextType.setStoreTermVectorOffsets(true);
|
||||
doc.add(newField("f", text, storedTextType));
|
||||
writer.addDocument(doc);
|
||||
writer.commit();
|
||||
writer.addDocument(doc);
|
||||
|
@ -149,8 +152,8 @@ public class TestAppendingCodec extends LuceneTestCase {
|
|||
writer.close();
|
||||
IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider());
|
||||
assertEquals(2, reader.numDocs());
|
||||
doc = reader.document(0);
|
||||
assertEquals(text, doc.get("f"));
|
||||
Document doc2 = reader.document(0);
|
||||
assertEquals(text, doc2.get("f"));
|
||||
Fields fields = MultiFields.getFields(reader);
|
||||
Terms terms = fields.terms("f");
|
||||
assertNotNull(terms);
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.misc;
|
|||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -203,9 +203,9 @@ public class TestHighFreqTerms extends LuceneTestCase {
|
|||
Document doc = new Document();
|
||||
String content = getContent(i);
|
||||
|
||||
doc.add(newField(random, "FIELD_1", content, Field.Store.YES,Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField(random, "FIELD_1", content, TextField.TYPE_STORED));
|
||||
//add a different field
|
||||
doc.add(newField(random, "different_field", "diff", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField(random, "different_field", "diff", TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
|
@ -213,7 +213,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
|
|||
//highest freq terms for a specific field.
|
||||
for (int i = 1; i <= 10; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newField(random, "different_field", "diff", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField(random, "different_field", "diff", TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
// add some docs where tf < df so we can see if sorting works
|
||||
|
@ -224,7 +224,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
|
|||
for (int i = 0; i < highTF; i++) {
|
||||
content += "highTF ";
|
||||
}
|
||||
doc.add(newField(random, "FIELD_1", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField(random, "FIELD_1", content, TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
// highTF medium df =5
|
||||
int medium_df = 5;
|
||||
|
@ -235,7 +235,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
|
|||
for (int j = 0; j < tf; j++) {
|
||||
newcontent += "highTFmedDF ";
|
||||
}
|
||||
newdoc.add(newField(random, "FIELD_1", newcontent, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
newdoc.add(newField(random, "FIELD_1", newcontent, TextField.TYPE_STORED));
|
||||
writer.addDocument(newdoc);
|
||||
}
|
||||
// add a doc with high tf in field different_field
|
||||
|
@ -245,7 +245,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
|
|||
for (int i = 0; i < targetTF; i++) {
|
||||
content += "TF150 ";
|
||||
}
|
||||
doc.add(newField(random, "different_field", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
doc.add(newField(random, "different_field", content, TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
|
|
|
@ -21,11 +21,12 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.FieldNormModifier;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiNorms;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -70,16 +71,12 @@ public class TestLengthNormModifier extends LuceneTestCase {
|
|||
|
||||
for (int i = 0; i < NUM_DOCS; i++) {
|
||||
Document d = new Document();
|
||||
d.add(newField("field", "word",
|
||||
Field.Store.YES, Field.Index.ANALYZED));
|
||||
d.add(newField("nonorm", "word",
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
d.add(newField("field", "word", TextField.TYPE_STORED));
|
||||
d.add(newField("nonorm", "word", StringField.TYPE_STORED));
|
||||
|
||||
for (int j = 1; j <= i; j++) {
|
||||
d.add(newField("field", "crap",
|
||||
Field.Store.YES, Field.Index.ANALYZED));
|
||||
d.add(newField("nonorm", "more words",
|
||||
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
d.add(newField("field", "crap", TextField.TYPE_STORED));
|
||||
d.add(newField("nonorm", "more words", StringField.TYPE_STORED));
|
||||
}
|
||||
writer.addDocument(d);
|
||||
}
|
||||
|
|
|
@ -22,11 +22,11 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.*;
|
||||
|
||||
import java.util.Random;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -68,9 +68,15 @@ public class TestThreadSafe extends LuceneTestCase {
|
|||
}
|
||||
|
||||
|
||||
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
|
||||
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
|
||||
ir.document(docID, visitor);
|
||||
return visitor.getDocument();
|
||||
}
|
||||
|
||||
void loadDoc(IndexReader ir) throws IOException {
|
||||
// beware of deleted docs in the future
|
||||
Document doc = ir.document(rand.nextInt(ir.maxDoc()),
|
||||
Document doc = getDocument(ir, rand.nextInt(ir.maxDoc()),
|
||||
new FieldSelector() {
|
||||
public FieldSelectorResult accept(String fieldName) {
|
||||
switch(rand.nextInt(2)) {
|
||||
|
@ -83,8 +89,7 @@ public class TestThreadSafe extends LuceneTestCase {
|
|||
}
|
||||
);
|
||||
|
||||
List<Fieldable> fields = doc.getFields();
|
||||
for (final Fieldable f : fields ) {
|
||||
for (final IndexableField f : doc ) {
|
||||
validateField(f);
|
||||
}
|
||||
|
||||
|
@ -93,7 +98,7 @@ public class TestThreadSafe extends LuceneTestCase {
|
|||
}
|
||||
|
||||
|
||||
void validateField(Fieldable f) {
|
||||
void validateField(IndexableField f) {
|
||||
String val = f.stringValue();
|
||||
if (!val.startsWith("^") || !val.endsWith("$")) {
|
||||
throw new RuntimeException("Invalid field:" + f.toString() + " val=" +val);
|
||||
|
@ -113,9 +118,7 @@ public class TestThreadSafe extends LuceneTestCase {
|
|||
StringBuilder sb = new StringBuilder("^ ");
|
||||
while (sb.length() < flen) sb.append(' ').append(words[random.nextInt(words.length)]);
|
||||
sb.append(" $");
|
||||
Field.Store store = Field.Store.YES; // make random later
|
||||
Field.Index index = Field.Index.ANALYZED; // make random later
|
||||
d.add(newField("f"+i, sb.toString(), store, index));
|
||||
d.add(newField("f"+i, sb.toString(), TextField.TYPE_STORED));
|
||||
}
|
||||
iw.addDocument(d);
|
||||
}
|
|
@ -19,7 +19,8 @@ package org.apache.lucene.sandbox.queries;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
@ -74,9 +75,9 @@ public class DuplicateFilterTest extends LuceneTestCase {
|
|||
|
||||
private void addDoc(RandomIndexWriter writer, String url, String text, String date) throws IOException {
|
||||
Document doc = new Document();
|
||||
doc.add(newField(KEY_FIELD, url, Field.Store.YES, Field.Index.NOT_ANALYZED));
|
||||
doc.add(newField("text", text, Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("date", date, Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField(KEY_FIELD, url, StringField.TYPE_STORED));
|
||||
doc.add(newField("text", text, TextField.TYPE_STORED));
|
||||
doc.add(newField("date", date, TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.sandbox.queries;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -68,8 +68,8 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
|
|||
|
||||
private void addDoc(RandomIndexWriter writer, String name, String id) throws IOException {
|
||||
Document doc = new Document();
|
||||
doc.add(newField("name", name, Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("id", id, Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("name", name, TextField.TYPE_STORED));
|
||||
doc.add(newField("id", id, TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ import java.util.Locale;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.*;
|
||||
|
@ -57,7 +58,7 @@ public class TestSlowCollationMethods extends LuceneTestCase {
|
|||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
String value = _TestUtil.randomUnicodeString(random);
|
||||
Field field = newField("field", value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
|
||||
Field field = newField("field", value, StringField.TYPE_STORED);
|
||||
doc.add(field);
|
||||
iw.addDocument(doc);
|
||||
}
|
||||
|
|
|
@ -25,7 +25,7 @@ import org.apache.lucene.index.RandomIndexWriter;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
||||
|
@ -47,7 +47,7 @@ public class TestRegexQuery extends LuceneTestCase {
|
|||
directory = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
|
||||
Document doc = new Document();
|
||||
doc.add(newField(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
|
||||
doc.add(newField(FN, "the quick brown fox jumps over the lazy dog", TextField.TYPE_UNSTORED));
|
||||
writer.addDocument(doc);
|
||||
reader = writer.getReader();
|
||||
writer.close();
|
||||
|
|
|
@ -21,7 +21,8 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -62,12 +63,10 @@ public class TestSpanRegexQuery extends LuceneTestCase {
|
|||
// Field.Store.NO, Field.Index.ANALYZED));
|
||||
// writer.addDocument(doc);
|
||||
// doc = new Document();
|
||||
doc.add(newField("field", "auto update", Field.Store.NO,
|
||||
Field.Index.ANALYZED));
|
||||
doc.add(newField("field", "auto update", TextField.TYPE_UNSTORED));
|
||||
writer.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(newField("field", "first auto update", Field.Store.NO,
|
||||
Field.Index.ANALYZED));
|
||||
doc.add(newField("field", "first auto update", TextField.TYPE_UNSTORED));
|
||||
writer.addDocument(doc);
|
||||
writer.optimize();
|
||||
writer.close();
|
||||
|
@ -87,13 +86,13 @@ public class TestSpanRegexQuery extends LuceneTestCase {
|
|||
LockObtainFailedException, IOException {
|
||||
// creating a document to store
|
||||
Document lDoc = new Document();
|
||||
lDoc.add(newField("field", "a1 b1", Field.Store.NO,
|
||||
Field.Index.ANALYZED_NO_NORMS));
|
||||
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
|
||||
customType.setOmitNorms(true);
|
||||
lDoc.add(newField("field", "a1 b1", customType));
|
||||
|
||||
// creating a document to store
|
||||
Document lDoc2 = new Document();
|
||||
lDoc2.add(newField("field", "a2 b2", Field.Store.NO,
|
||||
Field.Index.ANALYZED_NO_NORMS));
|
||||
lDoc2.add(newField("field", "a2 b2", customType));
|
||||
|
||||
// creating first index writer
|
||||
IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(
|
||||
|
|
|
@ -23,10 +23,10 @@ import java.util.Map;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
@ -94,25 +94,21 @@ public class TestCartesian extends LuceneTestCase {
|
|||
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(newField("name", name,Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("name", name, TextField.TYPE_STORED));
|
||||
|
||||
// convert the lat / long to lucene fields
|
||||
doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat));
|
||||
doc.add(new NumericField(lngField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lng));
|
||||
doc.add(new NumericField(latField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lat));
|
||||
doc.add(new NumericField(lngField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lng));
|
||||
|
||||
// add a default meta field to make searching all documents easy
|
||||
doc.add(newField("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("metafile", "doc", TextField.TYPE_STORED));
|
||||
|
||||
int ctpsize = ctps.size();
|
||||
for (int i =0; i < ctpsize; i++){
|
||||
CartesianTierPlotter ctp = ctps.get(i);
|
||||
doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE,
|
||||
Field.Store.YES,
|
||||
true).setDoubleValue(ctp.getTierBoxId(lat,lng)));
|
||||
doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE, TextField.TYPE_STORED).setDoubleValue(ctp.getTierBoxId(lat,lng)));
|
||||
|
||||
doc.add(newField(geoHashPrefix, GeoHashUtils.encode(lat,lng),
|
||||
Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||
doc.add(newField(geoHashPrefix, GeoHashUtils.encode(lat,lng), StringField.TYPE_STORED));
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
|
||||
|
|
|
@ -20,8 +20,8 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
|
@ -63,14 +63,14 @@ public class TestDistance extends LuceneTestCase {
|
|||
|
||||
Document doc = new Document();
|
||||
|
||||
doc.add(newField("name", name,Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("name", name, TextField.TYPE_STORED));
|
||||
|
||||
// convert the lat / long to lucene fields
|
||||
doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat));
|
||||
doc.add(new NumericField(lngField, Integer.MAX_VALUE,Field.Store.YES, true).setDoubleValue(lng));
|
||||
doc.add(new NumericField(latField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lat));
|
||||
doc.add(new NumericField(lngField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lng));
|
||||
|
||||
// add a default meta field to make searching all documents easy
|
||||
doc.add(newField("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("metafile", "doc", TextField.TYPE_STORED));
|
||||
writer.addDocument(doc);
|
||||
|
||||
}
|
||||
|
|
|
@ -1,11 +1,29 @@
|
|||
package org.apache.lucene.xmlparser;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
@ -24,22 +42,6 @@ import java.io.IOException;
|
|||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* <p/>
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
* <p/>
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestParser extends LuceneTestCase {
|
||||
|
||||
|
@ -63,9 +65,9 @@ public class TestParser extends LuceneTestCase {
|
|||
int endOfDate = line.indexOf('\t');
|
||||
String date = line.substring(0, endOfDate).trim();
|
||||
String content = line.substring(endOfDate).trim();
|
||||
org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
|
||||
doc.add(newField("date", date, Field.Store.YES, Field.Index.ANALYZED));
|
||||
doc.add(newField("contents", content, Field.Store.YES, Field.Index.ANALYZED));
|
||||
Document doc = new Document();
|
||||
doc.add(newField("date", date, TextField.TYPE_STORED));
|
||||
doc.add(newField("contents", content, TextField.TYPE_STORED));
|
||||
NumericField numericField = new NumericField("date2");
|
||||
numericField.setIntValue(Integer.valueOf(date));
|
||||
doc.add(numericField);
|
||||
|
@ -217,7 +219,7 @@ public class TestParser extends LuceneTestCase {
|
|||
System.out.println("=========" + qType + "============");
|
||||
ScoreDoc[] scoreDocs = hits.scoreDocs;
|
||||
for (int i = 0; i < Math.min(numDocs, hits.totalHits); i++) {
|
||||
org.apache.lucene.document.Document ldoc = searcher.doc(scoreDocs[i].doc);
|
||||
Document ldoc = searcher.doc(scoreDocs[i].doc);
|
||||
System.out.println("[" + ldoc.get("date") + "]" + ldoc.get("contents"));
|
||||
}
|
||||
System.out.println();
|
||||
|
|
|
@ -1,24 +1,5 @@
|
|||
package org.apache.lucene.xmlparser;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.w3c.dom.Document;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -36,6 +17,26 @@ import java.util.StringTokenizer;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.w3c.dom.Document;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import java.io.IOException;
|
||||
import java.util.Locale;
|
||||
import java.util.Properties;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
|
||||
/**
|
||||
* This class illustrates how form input (such as from a web page or Swing gui) can be
|
||||
* turned into Lucene queries using a choice of XSL templates for different styles of queries.
|
||||
|
@ -125,7 +126,7 @@ public class TestQueryTemplateManager extends LuceneTestCase {
|
|||
String name = st.nextToken().trim();
|
||||
if (st.hasMoreTokens()) {
|
||||
String value = st.nextToken().trim();
|
||||
result.add(newField(name, value, Field.Store.YES, Field.Index.ANALYZED));
|
||||
result.add(newField(name, value, TextField.TYPE_STORED));
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
@ -22,11 +22,10 @@ import java.io.IOException;
|
|||
import java.io.Closeable;
|
||||
import java.lang.reflect.Modifier;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.util.CloseableThreadLocal;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
||||
/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
|
||||
* policy for extracting index terms from text.
|
||||
* <p>
|
||||
|
@ -111,16 +110,16 @@ public abstract class Analyzer implements Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Invoked before indexing a Fieldable instance if
|
||||
* Invoked before indexing a IndexableField instance if
|
||||
* terms have already been added to that field. This allows custom
|
||||
* analyzers to place an automatic position increment gap between
|
||||
* Fieldable instances using the same field name. The default value
|
||||
* IndexbleField instances using the same field name. The default value
|
||||
* position increment gap is 0. With a 0 position increment gap and
|
||||
* the typical default token position increment of 1, all terms in a field,
|
||||
* including across Fieldable instances, are in successive positions, allowing
|
||||
* exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
|
||||
* including across IndexableField instances, are in successive positions, allowing
|
||||
* exact PhraseQuery matches, for instance, across IndexableField instance boundaries.
|
||||
*
|
||||
* @param fieldName Fieldable name being indexed.
|
||||
* @param fieldName IndexableField name being indexed.
|
||||
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
|
||||
*/
|
||||
public int getPositionIncrementGap(String fieldName) {
|
||||
|
@ -138,12 +137,13 @@ public abstract class Analyzer implements Closeable {
|
|||
* @param field the field just indexed
|
||||
* @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
|
||||
*/
|
||||
public int getOffsetGap(Fieldable field) {
|
||||
if (field.isTokenized())
|
||||
public int getOffsetGap(IndexableField field) {
|
||||
if (field.tokenized()) {
|
||||
return 1;
|
||||
else
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/** Frees persistent resources used by this Analyzer */
|
||||
public void close() {
|
||||
|
|
|
@ -120,7 +120,7 @@ There are many post tokenization steps that can be done, including (but not limi
|
|||
Applications usually do not invoke analysis – Lucene does it for them:
|
||||
<ul>
|
||||
<li>At indexing, as a consequence of
|
||||
{@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document) addDocument(doc)},
|
||||
{@link org.apache.lucene.index.IndexWriter#addDocument(Iterable) addDocument(doc)},
|
||||
the Analyzer in effect for indexing is invoked for each indexed field of the added document.
|
||||
</li>
|
||||
<li>At search, a QueryParser may invoke the Analyzer during parsing. Note that for some queries, analysis does not
|
||||
|
@ -170,7 +170,7 @@ the source code of any one of the many samples located in this package.
|
|||
</p>
|
||||
<h3>Field Section Boundaries</h3>
|
||||
<p>
|
||||
When {@link org.apache.lucene.document.Document#add(org.apache.lucene.document.Fieldable) document.add(field)}
|
||||
When {@link org.apache.lucene.document.Document#add(org.apache.lucene.index.IndexableField) document.add(field)}
|
||||
is called multiple times for the same field name, we could say that each such call creates a new
|
||||
section for that field in that document.
|
||||
In fact, a separate call to
|
||||
|
|
|
@ -1,312 +0,0 @@
|
|||
package org.apache.lucene.document;
|
||||
/**
|
||||
* Copyright 2006 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
||||
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public abstract class AbstractField implements Fieldable {
|
||||
|
||||
protected String name = "body";
|
||||
protected boolean storeTermVector = false;
|
||||
protected boolean storeOffsetWithTermVector = false;
|
||||
protected boolean storePositionWithTermVector = false;
|
||||
protected boolean omitNorms = false;
|
||||
protected boolean isStored = false;
|
||||
protected boolean isIndexed = true;
|
||||
protected boolean isTokenized = true;
|
||||
protected boolean isBinary = false;
|
||||
protected boolean lazy = false;
|
||||
protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
protected float boost = 1.0f;
|
||||
// the data object for all different kind of field values
|
||||
protected Object fieldsData = null;
|
||||
// pre-analyzed tokenStream for indexed fields
|
||||
protected TokenStream tokenStream;
|
||||
// length/offset for all primitive types
|
||||
protected int binaryLength;
|
||||
protected int binaryOffset;
|
||||
protected PerDocFieldValues docValues;
|
||||
|
||||
protected AbstractField()
|
||||
{
|
||||
}
|
||||
|
||||
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
this.name = name;
|
||||
|
||||
this.isStored = store.isStored();
|
||||
this.isIndexed = index.isIndexed();
|
||||
this.isTokenized = index.isAnalyzed();
|
||||
this.omitNorms = index.omitNorms();
|
||||
|
||||
this.isBinary = false;
|
||||
|
||||
setStoreTermVector(termVector);
|
||||
}
|
||||
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* used to compute the norm factor for the field. By
|
||||
* default, in the {@link
|
||||
* org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
|
||||
* by the length normalization factor and then
|
||||
* rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
|
||||
* @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/** Returns the name of the field.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
public String name() { return name; }
|
||||
|
||||
protected void setStoreTermVector(Field.TermVector termVector) {
|
||||
this.storeTermVector = termVector.isStored();
|
||||
this.storePositionWithTermVector = termVector.withPositions();
|
||||
this.storeOffsetWithTermVector = termVector.withOffsets();
|
||||
}
|
||||
|
||||
/** True iff the value of the field is to be stored in the index for return
|
||||
with search hits. It is an error for this to be true if a field is
|
||||
Reader-valued. */
|
||||
public final boolean isStored() { return isStored; }
|
||||
|
||||
/** True iff the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
public final boolean isIndexed() { return isIndexed; }
|
||||
|
||||
/** True iff the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
public final boolean isTokenized() { return isTokenized; }
|
||||
|
||||
/** True iff the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
public final boolean isTermVectorStored() { return storeTermVector; }
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their offsets
|
||||
* (start and end position in source text).
|
||||
*/
|
||||
public boolean isStoreOffsetWithTermVector(){
|
||||
return storeOffsetWithTermVector;
|
||||
}
|
||||
|
||||
/**
|
||||
* True iff terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
public boolean isStorePositionWithTermVector(){
|
||||
return storePositionWithTermVector;
|
||||
}
|
||||
|
||||
/** True iff the value of the filed is stored as binary */
|
||||
public final boolean isBinary() {
|
||||
return isBinary;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return the raw byte[] for the binary field. Note that
|
||||
* you must also call {@link #getBinaryLength} and {@link
|
||||
* #getBinaryOffset} to know which range of bytes in this
|
||||
* returned array belong to the field.
|
||||
* @return reference to the Field value as byte[].
|
||||
*/
|
||||
public byte[] getBinaryValue() {
|
||||
return getBinaryValue(null);
|
||||
}
|
||||
|
||||
public byte[] getBinaryValue(byte[] result){
|
||||
if (isBinary || fieldsData instanceof byte[])
|
||||
return (byte[]) fieldsData;
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns length of byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return length of byte[] segment that represents this Field value
|
||||
*/
|
||||
public int getBinaryLength() {
|
||||
if (isBinary) {
|
||||
return binaryLength;
|
||||
} else if (fieldsData instanceof byte[])
|
||||
return ((byte[]) fieldsData).length;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns offset into byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return index of the first character in byte[] segment that represents this Field value
|
||||
*/
|
||||
public int getBinaryOffset() {
|
||||
return binaryOffset;
|
||||
}
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
public boolean getOmitNorms() { return omitNorms; }
|
||||
|
||||
/** @see #setIndexOptions */
|
||||
public IndexOptions getIndexOptions() { return indexOptions; }
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit term freq, and optionally also positions and payloads from
|
||||
* postings for this field.
|
||||
*
|
||||
* <p><b>NOTE</b>: While this option reduces storage space
|
||||
* required in the index, it also means any query
|
||||
* requiring positional information, such as {@link
|
||||
* PhraseQuery} or {@link SpanQuery} subclasses will
|
||||
* silently fail to find results.
|
||||
*/
|
||||
public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; }
|
||||
|
||||
public boolean isLazy() {
|
||||
return lazy;
|
||||
}
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
@Override
|
||||
public final String toString() {
|
||||
StringBuilder result = new StringBuilder();
|
||||
if (isStored) {
|
||||
result.append("stored");
|
||||
}
|
||||
if (isIndexed) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("indexed");
|
||||
}
|
||||
if (isTokenized) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("tokenized");
|
||||
}
|
||||
if (storeTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVector");
|
||||
}
|
||||
if (storeOffsetWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorOffsets");
|
||||
}
|
||||
if (storePositionWithTermVector) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorPosition");
|
||||
}
|
||||
if (isBinary) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("binary");
|
||||
}
|
||||
if (omitNorms) {
|
||||
result.append(",omitNorms");
|
||||
}
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
result.append(",indexOptions=");
|
||||
result.append(indexOptions);
|
||||
}
|
||||
if (lazy){
|
||||
result.append(",lazy");
|
||||
}
|
||||
result.append('<');
|
||||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
if (fieldsData != null && lazy == false) {
|
||||
result.append(fieldsData);
|
||||
}
|
||||
|
||||
result.append('>');
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public PerDocFieldValues getDocValues() {
|
||||
return docValues;
|
||||
}
|
||||
|
||||
public void setDocValues(PerDocFieldValues docValues) {
|
||||
this.docValues = docValues;
|
||||
}
|
||||
|
||||
public boolean hasDocValues() {
|
||||
return docValues != null && docValues.type() != null;
|
||||
}
|
||||
|
||||
public ValueType docValuesType() {
|
||||
return docValues == null? null : docValues.type();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,49 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public final class BinaryField extends Field {
|
||||
|
||||
public static final FieldType TYPE_STORED = new FieldType();
|
||||
static {
|
||||
TYPE_STORED.setStored(true);
|
||||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
public BinaryField(String name, byte[] value) {
|
||||
super(name, BinaryField.TYPE_STORED, value);
|
||||
}
|
||||
|
||||
public BinaryField(String name, byte[] value, int offset, int length) {
|
||||
super(name, BinaryField.TYPE_STORED, value, offset, length);
|
||||
}
|
||||
|
||||
public BinaryField(String name, BytesRef bytes) {
|
||||
super(name, BinaryField.TYPE_STORED, bytes);
|
||||
}
|
||||
|
||||
public BinaryField(String name, FieldType custom, byte[] value) {
|
||||
super(name, custom, value);
|
||||
}
|
||||
|
||||
public boolean isNumeric() {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -92,16 +92,24 @@ public class CompressionTools {
|
|||
return compress(result.bytes, 0, result.length, compressionLevel);
|
||||
}
|
||||
|
||||
public static byte[] decompress(BytesRef bytes) throws DataFormatException {
|
||||
return decompress(bytes.bytes, bytes.offset, bytes.length);
|
||||
}
|
||||
|
||||
public static byte[] decompress(byte[] value) throws DataFormatException {
|
||||
return decompress(value, 0, value.length);
|
||||
}
|
||||
|
||||
/** Decompress the byte array previously returned by
|
||||
* compress */
|
||||
public static byte[] decompress(byte[] value) throws DataFormatException {
|
||||
public static byte[] decompress(byte[] value, int offset, int length) throws DataFormatException {
|
||||
// Create an expandable byte array to hold the decompressed data
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length);
|
||||
ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
|
||||
|
||||
Inflater decompressor = new Inflater();
|
||||
|
||||
try {
|
||||
decompressor.setInput(value);
|
||||
decompressor.setInput(value, offset, length);
|
||||
|
||||
// Decompress the data
|
||||
final byte[] buf = new byte[1024];
|
||||
|
@ -119,9 +127,17 @@ public class CompressionTools {
|
|||
/** Decompress the byte array previously returned by
|
||||
* compressString back into a String */
|
||||
public static String decompressString(byte[] value) throws DataFormatException {
|
||||
final byte[] bytes = decompress(value);
|
||||
return decompressString(value, 0, value.length);
|
||||
}
|
||||
|
||||
public static String decompressString(byte[] value, int offset, int length) throws DataFormatException {
|
||||
final byte[] bytes = decompress(value, offset, length);
|
||||
CharsRef result = new CharsRef(bytes.length);
|
||||
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
|
||||
return new String(result.chars, 0, result.length);
|
||||
}
|
||||
|
||||
public static String decompressString(BytesRef bytes) throws DataFormatException {
|
||||
return decompressString(bytes.bytes, bytes.offset, bytes.length);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,61 +17,55 @@ package org.apache.lucene.document;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.*; // for javadoc
|
||||
import java.util.*;
|
||||
|
||||
import org.apache.lucene.index.IndexReader; // for javadoc
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.IndexSearcher; // for javadoc
|
||||
import org.apache.lucene.search.ScoreDoc; // for javadoc
|
||||
import org.apache.lucene.index.IndexReader; // for javadoc
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Documents are the unit of indexing and search.
|
||||
*
|
||||
* A Document is a set of fields. Each field has a name and a textual value.
|
||||
* A field may be {@link Fieldable#isStored() stored} with the document, in which
|
||||
* A field may be {@link IndexableField#stored() stored} with the document, in which
|
||||
* case it is returned with search hits on the document. Thus each document
|
||||
* should typically contain one or more stored fields which uniquely identify
|
||||
* it.
|
||||
*
|
||||
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
|
||||
* <p>Note that fields which are <i>not</i> {@link IndexableField#stored() stored} are
|
||||
* <i>not</i> available in documents retrieved from the index, e.g. with {@link
|
||||
* ScoreDoc#doc} or {@link IndexReader#document(int)}.
|
||||
*/
|
||||
|
||||
public final class Document {
|
||||
List<Fieldable> fields = new ArrayList<Fieldable>();
|
||||
private float boost = 1.0f;
|
||||
public final class Document implements Iterable<IndexableField> {
|
||||
|
||||
private final List<IndexableField> fields = new ArrayList<IndexableField>();
|
||||
|
||||
/** Constructs a new document with no fields. */
|
||||
public Document() {}
|
||||
|
||||
@Override
|
||||
public Iterator<IndexableField> iterator() {
|
||||
|
||||
/** Sets a boost factor for hits on any field of this document. This value
|
||||
* will be multiplied into the score of all hits on this document.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
|
||||
* each field in this document. Thus, this method in effect sets a default
|
||||
* boost for the fields of this document.
|
||||
*
|
||||
* @see Fieldable#setBoost(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
return new Iterator<IndexableField>() {
|
||||
private int fieldUpto = 0;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return fieldUpto < fields.size();
|
||||
}
|
||||
|
||||
/** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}.
|
||||
*
|
||||
* <p>Note that once a document is indexed this value is no longer available
|
||||
* from the index. At search time, for retrieved documents, this method always
|
||||
* returns 1. This however does not mean that the boost value set at indexing
|
||||
* time was ignored - it was just combined with other indexing time factors and
|
||||
* stored elsewhere, for better indexing and search performance. (For more
|
||||
* information see the "norm(t,d)" part of the scoring formula in
|
||||
* {@link org.apache.lucene.search.Similarity Similarity}.)
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
public float getBoost() {
|
||||
return boost;
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexableField next() {
|
||||
return fields.get(fieldUpto++);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -84,7 +78,7 @@ public final class Document {
|
|||
* a document has to be deleted from an index and a new changed version of that
|
||||
* document has to be added.</p>
|
||||
*/
|
||||
public final void add(Fieldable field) {
|
||||
public final void add(IndexableField field) {
|
||||
fields.add(field);
|
||||
}
|
||||
|
||||
|
@ -99,9 +93,9 @@ public final class Document {
|
|||
* document has to be added.</p>
|
||||
*/
|
||||
public final void removeField(String name) {
|
||||
Iterator<Fieldable> it = fields.iterator();
|
||||
Iterator<IndexableField> it = fields.iterator();
|
||||
while (it.hasNext()) {
|
||||
Fieldable field = it.next();
|
||||
IndexableField field = it.next();
|
||||
if (field.name().equals(name)) {
|
||||
it.remove();
|
||||
return;
|
||||
|
@ -119,148 +113,15 @@ public final class Document {
|
|||
* document has to be added.</p>
|
||||
*/
|
||||
public final void removeFields(String name) {
|
||||
Iterator<Fieldable> it = fields.iterator();
|
||||
Iterator<IndexableField> it = fields.iterator();
|
||||
while (it.hasNext()) {
|
||||
Fieldable field = it.next();
|
||||
IndexableField field = it.next();
|
||||
if (field.name().equals(name)) {
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a field with the given name if any exist in this document, or
|
||||
* null. If multiple fields exists with this name, this method returns the
|
||||
* first value added.
|
||||
* Do not use this method with lazy loaded fields or {@link NumericField}.
|
||||
* @deprecated use {@link #getFieldable} instead and cast depending on
|
||||
* data type.
|
||||
* @throws ClassCastException if you try to retrieve a numerical or
|
||||
* lazy loaded field.
|
||||
*/
|
||||
@Deprecated
|
||||
public final Field getField(String name) {
|
||||
return (Field) getFieldable(name);
|
||||
}
|
||||
|
||||
|
||||
/** Returns a field with the given name if any exist in this document, or
|
||||
* null. If multiple fields exists with this name, this method returns the
|
||||
* first value added.
|
||||
*/
|
||||
public Fieldable getFieldable(String name) {
|
||||
for (Fieldable field : fields) {
|
||||
if (field.name().equals(name))
|
||||
return field;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns the string value of the field with the given name if any exist in
|
||||
* this document, or null. If multiple fields exist with this name, this
|
||||
* method returns the first value added. If only binary fields with this name
|
||||
* exist, returns null.
|
||||
* For {@link NumericField} it returns the string value of the number. If you want
|
||||
* the actual {@code NumericField} instance back, use {@link #getFieldable}.
|
||||
*/
|
||||
public final String get(String name) {
|
||||
for (Fieldable field : fields) {
|
||||
if (field.name().equals(name) && (!field.isBinary()))
|
||||
return field.stringValue();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns a List of all the fields in a document.
|
||||
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
|
||||
* <i>not</i> available in documents retrieved from the
|
||||
* index, e.g. {@link IndexSearcher#doc(int)} or {@link
|
||||
* IndexReader#document(int)}.
|
||||
*/
|
||||
public final List<Fieldable> getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
private final static Field[] NO_FIELDS = new Field[0];
|
||||
|
||||
/**
|
||||
* Returns an array of {@link Field}s with the given name.
|
||||
* This method returns an empty array when there are no
|
||||
* matching fields. It never returns null.
|
||||
* Do not use this method with lazy loaded fields or {@link NumericField}.
|
||||
*
|
||||
* @param name the name of the field
|
||||
* @return a <code>Field[]</code> array
|
||||
* @deprecated use {@link #getFieldable} instead and cast depending on
|
||||
* data type.
|
||||
* @throws ClassCastException if you try to retrieve a numerical or
|
||||
* lazy loaded field.
|
||||
*/
|
||||
@Deprecated
|
||||
public final Field[] getFields(String name) {
|
||||
List<Field> result = new ArrayList<Field>();
|
||||
for (Fieldable field : fields) {
|
||||
if (field.name().equals(name)) {
|
||||
result.add((Field) field);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.size() == 0)
|
||||
return NO_FIELDS;
|
||||
|
||||
return result.toArray(new Field[result.size()]);
|
||||
}
|
||||
|
||||
|
||||
private final static Fieldable[] NO_FIELDABLES = new Fieldable[0];
|
||||
|
||||
/**
|
||||
* Returns an array of {@link Fieldable}s with the given name.
|
||||
* This method returns an empty array when there are no
|
||||
* matching fields. It never returns null.
|
||||
*
|
||||
* @param name the name of the field
|
||||
* @return a <code>Fieldable[]</code> array
|
||||
*/
|
||||
public Fieldable[] getFieldables(String name) {
|
||||
List<Fieldable> result = new ArrayList<Fieldable>();
|
||||
for (Fieldable field : fields) {
|
||||
if (field.name().equals(name)) {
|
||||
result.add(field);
|
||||
}
|
||||
}
|
||||
|
||||
if (result.size() == 0)
|
||||
return NO_FIELDABLES;
|
||||
|
||||
return result.toArray(new Fieldable[result.size()]);
|
||||
}
|
||||
|
||||
|
||||
private final static String[] NO_STRINGS = new String[0];
|
||||
|
||||
/**
|
||||
* Returns an array of values of the field specified as the method parameter.
|
||||
* This method returns an empty array when there are no
|
||||
* matching fields. It never returns null.
|
||||
* For {@link NumericField}s it returns the string value of the number. If you want
|
||||
* the actual {@code NumericField} instances back, use {@link #getFieldables}.
|
||||
* @param name the name of the field
|
||||
* @return a <code>String[]</code> of field values
|
||||
*/
|
||||
public final String[] getValues(String name) {
|
||||
List<String> result = new ArrayList<String>();
|
||||
for (Fieldable field : fields) {
|
||||
if (field.name().equals(name) && (!field.isBinary()))
|
||||
result.add(field.stringValue());
|
||||
}
|
||||
|
||||
if (result.size() == 0)
|
||||
return NO_STRINGS;
|
||||
|
||||
return result.toArray(new String[result.size()]);
|
||||
}
|
||||
|
||||
private final static byte[][] NO_BYTES = new byte[0][];
|
||||
|
||||
/**
|
||||
* Returns an array of byte arrays for of the fields that have the name specified
|
||||
|
@ -271,17 +132,18 @@ public final class Document {
|
|||
* @param name the name of the field
|
||||
* @return a <code>byte[][]</code> of binary field values
|
||||
*/
|
||||
public final byte[][] getBinaryValues(String name) {
|
||||
List<byte[]> result = new ArrayList<byte[]>();
|
||||
for (Fieldable field : fields) {
|
||||
if (field.name().equals(name) && (field.isBinary()))
|
||||
result.add(field.getBinaryValue());
|
||||
public final BytesRef[] getBinaryValues(String name) {
|
||||
final List<BytesRef> result = new ArrayList<BytesRef>();
|
||||
for (IndexableField field : fields) {
|
||||
if (field.name().equals(name)) {
|
||||
final BytesRef bytes = field.binaryValue();
|
||||
if (bytes != null) {
|
||||
result.add(bytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (result.size() == 0)
|
||||
return NO_BYTES;
|
||||
|
||||
return result.toArray(new byte[result.size()][]);
|
||||
return result.toArray(new BytesRef[result.size()]);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -293,10 +155,72 @@ public final class Document {
|
|||
* @param name the name of the field.
|
||||
* @return a <code>byte[]</code> containing the binary field value or <code>null</code>
|
||||
*/
|
||||
public final byte[] getBinaryValue(String name) {
|
||||
for (Fieldable field : fields) {
|
||||
if (field.name().equals(name) && (field.isBinary()))
|
||||
return field.getBinaryValue();
|
||||
public final BytesRef getBinaryValue(String name) {
|
||||
for (IndexableField field : fields) {
|
||||
if (field.name().equals(name)) {
|
||||
final BytesRef bytes = field.binaryValue();
|
||||
if (bytes != null) {
|
||||
return bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns a field with the given name if any exist in this document, or
|
||||
* null. If multiple fields exists with this name, this method returns the
|
||||
* first value added.
|
||||
*/
|
||||
public final IndexableField getField(String name) {
|
||||
for (IndexableField field : fields) {
|
||||
if (field.name().equals(name)) {
|
||||
return field;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an array of {@link IndexableField}s with the given name.
|
||||
* This method returns an empty array when there are no
|
||||
* matching fields. It never returns null.
|
||||
*
|
||||
* @param name the name of the field
|
||||
* @return a <code>Fieldable[]</code> array
|
||||
*/
|
||||
public IndexableField[] getFields(String name) {
|
||||
List<IndexableField> result = new ArrayList<IndexableField>();
|
||||
for (IndexableField field : fields) {
|
||||
if (field.name().equals(name)) {
|
||||
result.add(field);
|
||||
}
|
||||
}
|
||||
|
||||
return result.toArray(new IndexableField[result.size()]);
|
||||
}
|
||||
|
||||
/** Returns a List of all the fields in a document.
|
||||
* <p>Note that fields which are <i>not</i> stored are
|
||||
* <i>not</i> available in documents retrieved from the
|
||||
* index, e.g. {@link IndexSearcher#doc(int)} or {@link
|
||||
* IndexReader#document(int)}.
|
||||
*/
|
||||
public final List<IndexableField> getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
/** Returns the string value of the field with the given name if any exist in
|
||||
* this document, or null. If multiple fields exist with this name, this
|
||||
* method returns the first value added. If only binary fields with this name
|
||||
* exist, returns null.
|
||||
* For {@link NumericField} it returns the string value of the number. If you want
|
||||
* the actual {@code NumericField} instance back, use {@link #getField}.
|
||||
*/
|
||||
public final String get(String name) {
|
||||
for (IndexableField field : fields) {
|
||||
if (field.name().equals(name) && field.stringValue() != null) {
|
||||
return field.stringValue();
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -307,7 +231,7 @@ public final class Document {
|
|||
StringBuilder buffer = new StringBuilder();
|
||||
buffer.append("Document<");
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
Fieldable field = fields.get(i);
|
||||
IndexableField field = fields.get(i);
|
||||
buffer.append(field.toString());
|
||||
if (i != fields.size()-1)
|
||||
buffer.append(" ");
|
||||
|
|
|
@ -21,514 +21,325 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
A field is a section of a Document. Each field has two parts, a name and a
|
||||
value. Values may be free text, provided as a String or as a Reader, or they
|
||||
may be atomic keywords, which are not further processed. Such keywords may
|
||||
be used to represent dates, urls, etc. Fields are optionally stored in the
|
||||
index, so that they may be returned with hits on the document.
|
||||
* A field is a section of a Document. Each field has two parts, a name and a
|
||||
* value. Values may be free text, provided as a String or as a Reader, or they
|
||||
* may be atomic keywords, which are not further processed. Such keywords may be
|
||||
* used to represent dates, urls, etc. Fields are optionally stored in the
|
||||
* index, so that they may be returned with hits on the document.
|
||||
*/
|
||||
|
||||
public final class Field extends AbstractField implements Fieldable {
|
||||
public class Field implements IndexableField {
|
||||
|
||||
/** Specifies whether and how a field should be stored. */
|
||||
public static enum Store {
|
||||
protected FieldType type;
|
||||
protected String name = "body";
|
||||
// the data object for all different kind of field values
|
||||
protected Object fieldsData;
|
||||
// pre-analyzed tokenStream for indexed fields
|
||||
protected TokenStream tokenStream;
|
||||
// length/offset for all primitive types
|
||||
protected PerDocFieldValues docValues;
|
||||
|
||||
/** Store the original field value in the index. This is useful for short texts
|
||||
* like a document's title which should be displayed with the results. The
|
||||
* value is stored in its original form, i.e. no analyzer is used before it is
|
||||
* stored.
|
||||
*/
|
||||
YES {
|
||||
@Override
|
||||
public boolean isStored() { return true; }
|
||||
},
|
||||
|
||||
/** Do not store the field value in the index. */
|
||||
NO {
|
||||
@Override
|
||||
public boolean isStored() { return false; }
|
||||
};
|
||||
|
||||
public abstract boolean isStored();
|
||||
}
|
||||
|
||||
/** Specifies whether and how a field should be indexed. */
|
||||
public static enum Index {
|
||||
|
||||
/** Do not index the field value. This field can thus not be searched,
|
||||
* but one can still access its contents provided it is
|
||||
* {@link Field.Store stored}. */
|
||||
NO {
|
||||
@Override
|
||||
public boolean isIndexed() { return false; }
|
||||
@Override
|
||||
public boolean isAnalyzed() { return false; }
|
||||
@Override
|
||||
public boolean omitNorms() { return true; }
|
||||
},
|
||||
|
||||
/** Index the tokens produced by running the field's
|
||||
* value through an Analyzer. This is useful for
|
||||
* common text. */
|
||||
ANALYZED {
|
||||
@Override
|
||||
public boolean isIndexed() { return true; }
|
||||
@Override
|
||||
public boolean isAnalyzed() { return true; }
|
||||
@Override
|
||||
public boolean omitNorms() { return false; }
|
||||
},
|
||||
|
||||
/** Index the field's value without using an Analyzer, so it can be searched.
|
||||
* As no analyzer is used the value will be stored as a single term. This is
|
||||
* useful for unique Ids like product numbers.
|
||||
*/
|
||||
NOT_ANALYZED {
|
||||
@Override
|
||||
public boolean isIndexed() { return true; }
|
||||
@Override
|
||||
public boolean isAnalyzed() { return false; }
|
||||
@Override
|
||||
public boolean omitNorms() { return false; }
|
||||
},
|
||||
|
||||
/** Expert: Index the field's value without an Analyzer,
|
||||
* and also disable the indexing of norms. Note that you
|
||||
* can also separately enable/disable norms by calling
|
||||
* {@link Field#setOmitNorms}. No norms means that
|
||||
* index-time field and document boosting and field
|
||||
* length normalization are disabled. The benefit is
|
||||
* less memory usage as norms take up one byte of RAM
|
||||
* per indexed field for every document in the index,
|
||||
* during searching. Note that once you index a given
|
||||
* field <i>with</i> norms disabled, enabling norms will
|
||||
* have no effect. In other words, for this to have the
|
||||
* above described effect on a field, one instance of
|
||||
* that field must be indexed with NOT_ANALYZED_NO_NORMS
|
||||
* at some point. */
|
||||
NOT_ANALYZED_NO_NORMS {
|
||||
@Override
|
||||
public boolean isIndexed() { return true; }
|
||||
@Override
|
||||
public boolean isAnalyzed() { return false; }
|
||||
@Override
|
||||
public boolean omitNorms() { return true; }
|
||||
},
|
||||
|
||||
/** Expert: Index the tokens produced by running the
|
||||
* field's value through an Analyzer, and also
|
||||
* separately disable the storing of norms. See
|
||||
* {@link #NOT_ANALYZED_NO_NORMS} for what norms are
|
||||
* and why you may want to disable them. */
|
||||
ANALYZED_NO_NORMS {
|
||||
@Override
|
||||
public boolean isIndexed() { return true; }
|
||||
@Override
|
||||
public boolean isAnalyzed() { return true; }
|
||||
@Override
|
||||
public boolean omitNorms() { return true; }
|
||||
};
|
||||
|
||||
/** Get the best representation of the index given the flags. */
|
||||
public static Index toIndex(boolean indexed, boolean analyzed) {
|
||||
return toIndex(indexed, analyzed, false);
|
||||
}
|
||||
|
||||
/** Expert: Get the best representation of the index given the flags. */
|
||||
public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) {
|
||||
|
||||
// If it is not indexed nothing else matters
|
||||
if (!indexed) {
|
||||
return Index.NO;
|
||||
}
|
||||
|
||||
// typical, non-expert
|
||||
if (!omitNorms) {
|
||||
if (analyzed) {
|
||||
return Index.ANALYZED;
|
||||
}
|
||||
return Index.NOT_ANALYZED;
|
||||
}
|
||||
|
||||
// Expert: Norms omitted
|
||||
if (analyzed) {
|
||||
return Index.ANALYZED_NO_NORMS;
|
||||
}
|
||||
return Index.NOT_ANALYZED_NO_NORMS;
|
||||
}
|
||||
|
||||
public abstract boolean isIndexed();
|
||||
public abstract boolean isAnalyzed();
|
||||
public abstract boolean omitNorms();
|
||||
}
|
||||
|
||||
/** Specifies whether and how a field should have term vectors. */
|
||||
public static enum TermVector {
|
||||
|
||||
/** Do not store term vectors.
|
||||
*/
|
||||
NO {
|
||||
@Override
|
||||
public boolean isStored() { return false; }
|
||||
@Override
|
||||
public boolean withPositions() { return false; }
|
||||
@Override
|
||||
public boolean withOffsets() { return false; }
|
||||
},
|
||||
|
||||
/** Store the term vectors of each document. A term vector is a list
|
||||
* of the document's terms and their number of occurrences in that document. */
|
||||
YES {
|
||||
@Override
|
||||
public boolean isStored() { return true; }
|
||||
@Override
|
||||
public boolean withPositions() { return false; }
|
||||
@Override
|
||||
public boolean withOffsets() { return false; }
|
||||
},
|
||||
|
||||
/**
|
||||
* Store the term vector + token position information
|
||||
*
|
||||
* @see #YES
|
||||
*/
|
||||
WITH_POSITIONS {
|
||||
@Override
|
||||
public boolean isStored() { return true; }
|
||||
@Override
|
||||
public boolean withPositions() { return true; }
|
||||
@Override
|
||||
public boolean withOffsets() { return false; }
|
||||
},
|
||||
|
||||
/**
|
||||
* Store the term vector + Token offset information
|
||||
*
|
||||
* @see #YES
|
||||
*/
|
||||
WITH_OFFSETS {
|
||||
@Override
|
||||
public boolean isStored() { return true; }
|
||||
@Override
|
||||
public boolean withPositions() { return false; }
|
||||
@Override
|
||||
public boolean withOffsets() { return true; }
|
||||
},
|
||||
|
||||
/**
|
||||
* Store the term vector + Token position and offset information
|
||||
*
|
||||
* @see #YES
|
||||
* @see #WITH_POSITIONS
|
||||
* @see #WITH_OFFSETS
|
||||
*/
|
||||
WITH_POSITIONS_OFFSETS {
|
||||
@Override
|
||||
public boolean isStored() { return true; }
|
||||
@Override
|
||||
public boolean withPositions() { return true; }
|
||||
@Override
|
||||
public boolean withOffsets() { return true; }
|
||||
};
|
||||
|
||||
/** Get the best representation of a TermVector given the flags. */
|
||||
public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) {
|
||||
|
||||
// If it is not stored, nothing else matters.
|
||||
if (!stored) {
|
||||
return TermVector.NO;
|
||||
}
|
||||
|
||||
if (withOffsets) {
|
||||
if (withPositions) {
|
||||
return Field.TermVector.WITH_POSITIONS_OFFSETS;
|
||||
}
|
||||
return Field.TermVector.WITH_OFFSETS;
|
||||
}
|
||||
|
||||
if (withPositions) {
|
||||
return Field.TermVector.WITH_POSITIONS;
|
||||
}
|
||||
return Field.TermVector.YES;
|
||||
}
|
||||
|
||||
public abstract boolean isStored();
|
||||
public abstract boolean withPositions();
|
||||
public abstract boolean withOffsets();
|
||||
}
|
||||
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value or
|
||||
* binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), and getBinaryValue() must be set. */
|
||||
public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value or
|
||||
* binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), and getBinaryValue() must be set. */
|
||||
public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
|
||||
|
||||
/** The TokesStream for this field to be used when indexing, or null. If null, the Reader value
|
||||
* or String value is analyzed to produce the indexed tokens. */
|
||||
public TokenStream tokenStreamValue() { return tokenStream; }
|
||||
|
||||
|
||||
/** <p>Expert: change the value of this field. This can
|
||||
* be used during indexing to re-use a single Field
|
||||
* instance to improve indexing speed by avoiding GC cost
|
||||
* of new'ing and reclaiming Field instances. Typically
|
||||
* a single {@link Document} instance is re-used as
|
||||
* well. This helps most on small documents.</p>
|
||||
*
|
||||
* <p>Each Field instance should only be used once
|
||||
* within a single {@link Document} instance. See <a
|
||||
* href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
|
||||
* for details.</p> */
|
||||
public void setValue(String value) {
|
||||
if (isBinary) {
|
||||
throw new IllegalArgumentException("cannot set a String value on a binary field");
|
||||
}
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
|
||||
public void setValue(Reader value) {
|
||||
if (isBinary) {
|
||||
throw new IllegalArgumentException("cannot set a Reader value on a binary field");
|
||||
}
|
||||
if (isStored) {
|
||||
throw new IllegalArgumentException("cannot set a Reader value on a stored field");
|
||||
}
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
|
||||
public void setValue(byte[] value) {
|
||||
if (!isBinary) {
|
||||
throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
|
||||
}
|
||||
fieldsData = value;
|
||||
binaryLength = value.length;
|
||||
binaryOffset = 0;
|
||||
}
|
||||
|
||||
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
|
||||
public void setValue(byte[] value, int offset, int length) {
|
||||
if (!isBinary) {
|
||||
throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
|
||||
}
|
||||
fieldsData = value;
|
||||
binaryLength = length;
|
||||
binaryOffset = offset;
|
||||
}
|
||||
|
||||
/** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
|
||||
* May be combined with stored values from stringValue() or getBinaryValue() */
|
||||
public void setTokenStream(TokenStream tokenStream) {
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
this.tokenStream = tokenStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a field by specifying its name, value and how it will
|
||||
* be saved in the index. Term vectors will not be stored in the index.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param value The string to process
|
||||
* @param store Whether <code>value</code> should be stored in the index
|
||||
* @param index Whether the field should be indexed, and if so, if it should
|
||||
* be tokenized before indexing
|
||||
* @throws NullPointerException if name or value is <code>null</code>
|
||||
* @throws IllegalArgumentException if the field is neither stored nor indexed
|
||||
*/
|
||||
public Field(String name, String value, Store store, Index index) {
|
||||
this(name, value, store, index, TermVector.NO);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a field by specifying its name, value and how it will
|
||||
* be saved in the index.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param value The string to process
|
||||
* @param store Whether <code>value</code> should be stored in the index
|
||||
* @param index Whether the field should be indexed, and if so, if it should
|
||||
* be tokenized before indexing
|
||||
* @param termVector Whether term vector should be stored
|
||||
* @throws NullPointerException if name or value is <code>null</code>
|
||||
* @throws IllegalArgumentException in any of the following situations:
|
||||
* <ul>
|
||||
* <li>the field is neither stored nor indexed</li>
|
||||
* <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
|
||||
* </ul>
|
||||
*/
|
||||
public Field(String name, String value, Store store, Index index, TermVector termVector) {
|
||||
if (name == null)
|
||||
throw new NullPointerException("name cannot be null");
|
||||
if (value == null)
|
||||
throw new NullPointerException("value cannot be null");
|
||||
if (name.length() == 0 && value.length() == 0)
|
||||
throw new IllegalArgumentException("name and value cannot both be empty");
|
||||
if (index == Index.NO && store == Store.NO)
|
||||
throw new IllegalArgumentException("it doesn't make sense to have a field that "
|
||||
+ "is neither indexed nor stored");
|
||||
if (index == Index.NO && termVector != TermVector.NO)
|
||||
throw new IllegalArgumentException("cannot store term vector information "
|
||||
+ "for a field that is not indexed");
|
||||
protected float boost = 1.0f;
|
||||
|
||||
public Field(String name, FieldType type) {
|
||||
this.name = name;
|
||||
|
||||
this.fieldsData = value;
|
||||
|
||||
this.isStored = store.isStored();
|
||||
|
||||
this.isIndexed = index.isIndexed();
|
||||
this.isTokenized = index.isAnalyzed();
|
||||
this.omitNorms = index.omitNorms();
|
||||
if (index == Index.NO) {
|
||||
// note: now this reads even wierder than before
|
||||
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
this.isBinary = false;
|
||||
|
||||
setStoreTermVector(termVector);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tokenized and indexed field that is not stored. Term vectors will
|
||||
* not be stored. The Reader is read only when the Document is added to the index,
|
||||
* i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
|
||||
* has been called.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param reader The reader with the content
|
||||
* @throws NullPointerException if name or reader is <code>null</code>
|
||||
*/
|
||||
public Field(String name, Reader reader) {
|
||||
this(name, reader, TermVector.NO);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tokenized and indexed field that is not stored, optionally with
|
||||
* storing term vectors. The Reader is read only when the Document is added to the index,
|
||||
* i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
|
||||
* has been called.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param reader The reader with the content
|
||||
* @param termVector Whether term vector should be stored
|
||||
* @throws NullPointerException if name or reader is <code>null</code>
|
||||
*/
|
||||
public Field(String name, Reader reader, TermVector termVector) {
|
||||
if (name == null)
|
||||
public Field(String name, FieldType type, Reader reader) {
|
||||
if (name == null) {
|
||||
throw new NullPointerException("name cannot be null");
|
||||
if (reader == null)
|
||||
}
|
||||
if (reader == null) {
|
||||
throw new NullPointerException("reader cannot be null");
|
||||
}
|
||||
|
||||
this.name = name;
|
||||
this.fieldsData = reader;
|
||||
|
||||
this.isStored = false;
|
||||
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
|
||||
this.isBinary = false;
|
||||
|
||||
setStoreTermVector(termVector);
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tokenized and indexed field that is not stored. Term vectors will
|
||||
* not be stored. This is useful for pre-analyzed fields.
|
||||
* The TokenStream is read only when the Document is added to the index,
|
||||
* i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
|
||||
* has been called.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param tokenStream The TokenStream with the content
|
||||
* @throws NullPointerException if name or tokenStream is <code>null</code>
|
||||
*/
|
||||
public Field(String name, TokenStream tokenStream) {
|
||||
this(name, tokenStream, TermVector.NO);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a tokenized and indexed field that is not stored, optionally with
|
||||
* storing term vectors. This is useful for pre-analyzed fields.
|
||||
* The TokenStream is read only when the Document is added to the index,
|
||||
* i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
|
||||
* has been called.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param tokenStream The TokenStream with the content
|
||||
* @param termVector Whether term vector should be stored
|
||||
* @throws NullPointerException if name or tokenStream is <code>null</code>
|
||||
*/
|
||||
public Field(String name, TokenStream tokenStream, TermVector termVector) {
|
||||
if (name == null)
|
||||
public Field(String name, FieldType type, TokenStream tokenStream) {
|
||||
if (name == null) {
|
||||
throw new NullPointerException("name cannot be null");
|
||||
if (tokenStream == null)
|
||||
}
|
||||
if (tokenStream == null) {
|
||||
throw new NullPointerException("tokenStream cannot be null");
|
||||
}
|
||||
|
||||
this.name = name;
|
||||
this.fieldsData = null;
|
||||
this.tokenStream = tokenStream;
|
||||
|
||||
this.isStored = false;
|
||||
|
||||
this.isIndexed = true;
|
||||
this.isTokenized = true;
|
||||
|
||||
this.isBinary = false;
|
||||
|
||||
setStoreTermVector(termVector);
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Create a stored field with binary value. Optionally the value may be compressed.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param value The binary value
|
||||
*/
|
||||
public Field(String name, byte[] value) {
|
||||
this(name, value, 0, value.length);
|
||||
public Field(String name, FieldType type, byte[] value) {
|
||||
this(name, type, value, 0, value.length);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a stored field with binary value. Optionally the value may be compressed.
|
||||
*
|
||||
* @param name The name of the field
|
||||
* @param value The binary value
|
||||
* @param offset Starting offset in value where this Field's bytes are
|
||||
* @param length Number of bytes to use for this Field, starting at offset
|
||||
*/
|
||||
public Field(String name, byte[] value, int offset, int length) {
|
||||
|
||||
if (name == null)
|
||||
throw new IllegalArgumentException("name cannot be null");
|
||||
if (value == null)
|
||||
throw new IllegalArgumentException("value cannot be null");
|
||||
|
||||
public Field(String name, FieldType type, byte[] value, int offset, int length) {
|
||||
this.fieldsData = new BytesRef(value, offset, length);
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public Field(String name, FieldType type, BytesRef bytes) {
|
||||
this.fieldsData = bytes;
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
public Field(String name, FieldType type, String value) {
|
||||
if (name == null) {
|
||||
throw new IllegalArgumentException("name cannot be null");
|
||||
}
|
||||
if (value == null) {
|
||||
throw new IllegalArgumentException("value cannot be null");
|
||||
}
|
||||
if (!type.stored() && !type.indexed()) {
|
||||
throw new IllegalArgumentException("it doesn't make sense to have a field that "
|
||||
+ "is neither indexed nor stored");
|
||||
}
|
||||
if (!type.indexed() && !type.tokenized() && (type.storeTermVectors())) {
|
||||
throw new IllegalArgumentException("cannot store term vector information "
|
||||
+ "for a field that is not indexed");
|
||||
}
|
||||
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.fieldsData = value;
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of the field as a String, or null. If null, the Reader value or
|
||||
* binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* getBinaryValue() must be set.
|
||||
*/
|
||||
public String stringValue() {
|
||||
return fieldsData instanceof String ? (String) fieldsData : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* The value of the field as a Reader, or null. If null, the String value or
|
||||
* binary value is used. Exactly one of stringValue(), readerValue(), and
|
||||
* getBinaryValue() must be set.
|
||||
*/
|
||||
public Reader readerValue() {
|
||||
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
|
||||
}
|
||||
|
||||
/**
|
||||
* The TokesStream for this field to be used when indexing, or null. If null,
|
||||
* the Reader value or String value is analyzed to produce the indexed tokens.
|
||||
*/
|
||||
public TokenStream tokenStreamValue() {
|
||||
return tokenStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Expert: change the value of this field. This can be used during indexing to
|
||||
* re-use a single Field instance to improve indexing speed by avoiding GC
|
||||
* cost of new'ing and reclaiming Field instances. Typically a single
|
||||
* {@link Document} instance is re-used as well. This helps most on small
|
||||
* documents.
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* Each Field instance should only be used once within a single
|
||||
* {@link Document} instance. See <a
|
||||
* href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed"
|
||||
* >ImproveIndexingSpeed</a> for details.
|
||||
* </p>
|
||||
*/
|
||||
public void setValue(String value) {
|
||||
if (isBinary()) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot set a String value on a binary field");
|
||||
}
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
isStored = true;
|
||||
isIndexed = false;
|
||||
isTokenized = false;
|
||||
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
omitNorms = true;
|
||||
/**
|
||||
* Expert: change the value of this field. See <a
|
||||
* href="#setValue(java.lang.String)">setValue(String)</a>.
|
||||
*/
|
||||
public void setValue(Reader value) {
|
||||
if (isBinary()) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot set a Reader value on a binary field");
|
||||
}
|
||||
if (stored()) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot set a Reader value on a stored field");
|
||||
}
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
isBinary = true;
|
||||
/**
|
||||
* Expert: change the value of this field. See <a
|
||||
* href="#setValue(java.lang.String)">setValue(String)</a>.
|
||||
*/
|
||||
public void setValue(byte[] value) {
|
||||
if (!isBinary()) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot set a byte[] value on a non-binary field");
|
||||
}
|
||||
fieldsData = new BytesRef(value);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: change the value of this field. See <a
|
||||
* href="#setValue(java.lang.String)">setValue(String)</a>.
|
||||
*/
|
||||
/*
|
||||
public void setValue(byte[] value, int offset, int length) {
|
||||
if (!isBinary) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot set a byte[] value on a non-binary field");
|
||||
}
|
||||
fieldsData = value;
|
||||
binaryLength = length;
|
||||
binaryOffset = offset;
|
||||
}
|
||||
*/
|
||||
|
||||
setStoreTermVector(TermVector.NO);
|
||||
/**
|
||||
* Expert: sets the token stream to be used for indexing and causes
|
||||
* isIndexed() and isTokenized() to return true. May be combined with stored
|
||||
* values from stringValue() or getBinaryValue()
|
||||
*/
|
||||
public void setTokenStream(TokenStream tokenStream) {
|
||||
if (!indexed() || !tokenized()) {
|
||||
throw new IllegalArgumentException(
|
||||
"cannot set token stream on non indexed and tokenized field");
|
||||
}
|
||||
this.tokenStream = tokenStream;
|
||||
}
|
||||
|
||||
public String name() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public float boost() {
|
||||
return boost;
|
||||
}
|
||||
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is used to compute the norm factor for the field. By
|
||||
* default, in the {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method,
|
||||
* the boost value is multiplied by the length normalization factor and then
|
||||
* rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
|
||||
* @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)
|
||||
*/
|
||||
public void setBoost(float boost) {
|
||||
this.boost = boost;
|
||||
}
|
||||
|
||||
public boolean numeric() {
|
||||
return false;
|
||||
}
|
||||
|
||||
public Number numericValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public NumericField.DataType numericDataType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
public BytesRef binaryValue() {
|
||||
if (!isBinary()) {
|
||||
return null;
|
||||
} else {
|
||||
return (BytesRef) fieldsData;
|
||||
}
|
||||
}
|
||||
|
||||
/** methods from inner FieldType */
|
||||
|
||||
public boolean isBinary() {
|
||||
return fieldsData instanceof BytesRef;
|
||||
}
|
||||
|
||||
public boolean stored() {
|
||||
return type.stored();
|
||||
}
|
||||
|
||||
public boolean indexed() {
|
||||
return type.indexed();
|
||||
}
|
||||
|
||||
public boolean tokenized() {
|
||||
return type.tokenized();
|
||||
}
|
||||
|
||||
public boolean omitNorms() {
|
||||
return type.omitNorms();
|
||||
}
|
||||
|
||||
public IndexOptions indexOptions() {
|
||||
return type.indexOptions();
|
||||
}
|
||||
|
||||
public boolean storeTermVectors() {
|
||||
return type.storeTermVectors();
|
||||
}
|
||||
|
||||
public boolean storeTermVectorOffsets() {
|
||||
return type.storeTermVectorOffsets();
|
||||
}
|
||||
|
||||
public boolean storeTermVectorPositions() {
|
||||
return type.storeTermVectorPositions();
|
||||
}
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder result = new StringBuilder();
|
||||
result.append(type.toString());
|
||||
result.append('<');
|
||||
result.append(name);
|
||||
result.append(':');
|
||||
|
||||
if (fieldsData != null && type.lazy() == false) {
|
||||
result.append(fieldsData);
|
||||
}
|
||||
|
||||
result.append('>');
|
||||
return result.toString();
|
||||
}
|
||||
|
||||
public void setDocValues(PerDocFieldValues docValues) {
|
||||
this.docValues = docValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocFieldValues docValues() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ValueType docValuesType() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns FieldType for this field. */
|
||||
public FieldType getFieldType() {
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,186 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
|
||||
public class FieldType {
|
||||
|
||||
private boolean indexed;
|
||||
private boolean stored;
|
||||
private boolean tokenized;
|
||||
private boolean storeTermVectors;
|
||||
private boolean storeTermVectorOffsets;
|
||||
private boolean storeTermVectorPositions;
|
||||
private boolean omitNorms;
|
||||
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
|
||||
private boolean lazy;
|
||||
private boolean frozen;
|
||||
|
||||
public FieldType(FieldType ref) {
|
||||
this.indexed = ref.indexed();
|
||||
this.stored = ref.stored();
|
||||
this.tokenized = ref.tokenized();
|
||||
this.storeTermVectors = ref.storeTermVectors();
|
||||
this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
|
||||
this.storeTermVectorPositions = ref.storeTermVectorPositions();
|
||||
this.omitNorms = ref.omitNorms();
|
||||
this.indexOptions = ref.indexOptions();
|
||||
this.lazy = ref.lazy();
|
||||
}
|
||||
|
||||
public FieldType() {
|
||||
}
|
||||
|
||||
private void checkIfFrozen() {
|
||||
if (frozen) {
|
||||
throw new IllegalStateException();
|
||||
}
|
||||
}
|
||||
|
||||
public void freeze() {
|
||||
this.frozen = true;
|
||||
}
|
||||
|
||||
public boolean indexed() {
|
||||
return this.indexed;
|
||||
}
|
||||
|
||||
public void setIndexed(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.indexed = value;
|
||||
}
|
||||
|
||||
public boolean stored() {
|
||||
return this.stored;
|
||||
}
|
||||
|
||||
public void setStored(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.stored = value;
|
||||
}
|
||||
|
||||
public boolean tokenized() {
|
||||
return this.tokenized;
|
||||
}
|
||||
|
||||
public void setTokenized(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.tokenized = value;
|
||||
}
|
||||
|
||||
public boolean storeTermVectors() {
|
||||
return this.storeTermVectors;
|
||||
}
|
||||
|
||||
public void setStoreTermVectors(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.storeTermVectors = value;
|
||||
}
|
||||
|
||||
public boolean storeTermVectorOffsets() {
|
||||
return this.storeTermVectorOffsets;
|
||||
}
|
||||
|
||||
public void setStoreTermVectorOffsets(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.storeTermVectorOffsets = value;
|
||||
}
|
||||
|
||||
public boolean storeTermVectorPositions() {
|
||||
return this.storeTermVectorPositions;
|
||||
}
|
||||
|
||||
public void setStoreTermVectorPositions(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.storeTermVectorPositions = value;
|
||||
}
|
||||
|
||||
public boolean omitNorms() {
|
||||
return this.omitNorms;
|
||||
}
|
||||
|
||||
public void setOmitNorms(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.omitNorms = value;
|
||||
}
|
||||
|
||||
public IndexOptions indexOptions() {
|
||||
return this.indexOptions;
|
||||
}
|
||||
|
||||
public void setIndexOptions(IndexOptions value) {
|
||||
checkIfFrozen();
|
||||
this.indexOptions = value;
|
||||
}
|
||||
|
||||
public boolean lazy() {
|
||||
return this.lazy;
|
||||
}
|
||||
|
||||
public void setLazy(boolean value) {
|
||||
checkIfFrozen();
|
||||
this.lazy = value;
|
||||
}
|
||||
|
||||
/** Prints a Field for human consumption. */
|
||||
@Override
|
||||
public final String toString() {
|
||||
StringBuilder result = new StringBuilder();
|
||||
if (stored()) {
|
||||
result.append("stored");
|
||||
}
|
||||
if (indexed()) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("indexed");
|
||||
}
|
||||
if (tokenized()) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("tokenized");
|
||||
}
|
||||
if (storeTermVectors()) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVector");
|
||||
}
|
||||
if (storeTermVectorOffsets()) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorOffsets");
|
||||
}
|
||||
if (storeTermVectorPositions()) {
|
||||
if (result.length() > 0)
|
||||
result.append(",");
|
||||
result.append("termVectorPosition");
|
||||
}
|
||||
if (omitNorms()) {
|
||||
result.append(",omitNorms");
|
||||
}
|
||||
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
|
||||
result.append(",indexOptions=");
|
||||
result.append(indexOptions);
|
||||
}
|
||||
if (lazy()){
|
||||
result.append(",lazy");
|
||||
}
|
||||
|
||||
return result.toString();
|
||||
}
|
||||
}
|
|
@ -1,238 +0,0 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/**
|
||||
* Copyright 2004 The Apache Software Foundation
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.FieldInvertState; // for javadocs
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
import org.apache.lucene.search.PhraseQuery; // for javadocs
|
||||
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
/**
|
||||
* Synonymous with {@link Field}.
|
||||
*
|
||||
* <p><bold>WARNING</bold>: This interface may change within minor versions, despite Lucene's backward compatibility requirements.
|
||||
* This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards
|
||||
* compatibility promises remain intact. For example, Lucene can still
|
||||
* read and write indices created within the same major version.
|
||||
* </p>
|
||||
*
|
||||
**/
|
||||
public interface Fieldable {
|
||||
/** Sets the boost factor hits on this field. This value will be
|
||||
* multiplied into the score of all hits on this this field of this
|
||||
* document.
|
||||
*
|
||||
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
|
||||
* containing this field. If a document has multiple fields with the same
|
||||
* name, all such values are multiplied together. This product is then
|
||||
* used to compute the norm factor for the field. By
|
||||
* default, in the {@link
|
||||
* org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
|
||||
* by the length normalization factor
|
||||
* and then rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
|
||||
* index. One should attempt to ensure that this product does not overflow
|
||||
* the range of that encoding.
|
||||
*
|
||||
* @see org.apache.lucene.document.Document#setBoost(float)
|
||||
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
|
||||
* @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)
|
||||
*/
|
||||
void setBoost(float boost);
|
||||
|
||||
/** Returns the boost factor for hits for this field.
|
||||
*
|
||||
* <p>The default value is 1.0.
|
||||
*
|
||||
* <p>Note: this value is not stored directly with the document in the index.
|
||||
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
|
||||
* {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
|
||||
* this field was indexed.
|
||||
*
|
||||
* @see #setBoost(float)
|
||||
*/
|
||||
float getBoost();
|
||||
|
||||
/** Returns the name of the field.
|
||||
* For example "date", "title", "body", ...
|
||||
*/
|
||||
String name();
|
||||
|
||||
/** The value of the field as a String, or null.
|
||||
* <p>
|
||||
* For indexing, if isStored()==true, the stringValue() will be used as the stored field value
|
||||
* unless isBinary()==true, in which case getBinaryValue() will be used.
|
||||
*
|
||||
* If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
|
||||
* If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
|
||||
* else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
|
||||
*/
|
||||
public String stringValue();
|
||||
|
||||
/** The value of the field as a Reader, which can be used at index time to generate indexed tokens.
|
||||
* @see #stringValue()
|
||||
*/
|
||||
public Reader readerValue();
|
||||
|
||||
/** The TokenStream for this field to be used when indexing, or null.
|
||||
* @see #stringValue()
|
||||
*/
|
||||
public TokenStream tokenStreamValue();
|
||||
|
||||
/** True if the value of the field is to be stored in the index for return
|
||||
with search hits. */
|
||||
boolean isStored();
|
||||
|
||||
/** True if the value of the field is to be indexed, so that it may be
|
||||
searched on. */
|
||||
boolean isIndexed();
|
||||
|
||||
/** True if the value of the field should be tokenized as text prior to
|
||||
indexing. Un-tokenized fields are indexed as a single word and may not be
|
||||
Reader-valued. */
|
||||
boolean isTokenized();
|
||||
|
||||
/** True if the term or terms used to index this field are stored as a term
|
||||
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
|
||||
* These methods do not provide access to the original content of the field,
|
||||
* only to terms used to index it. If the original content must be
|
||||
* preserved, use the <code>stored</code> attribute instead.
|
||||
*
|
||||
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
|
||||
*/
|
||||
boolean isTermVectorStored();
|
||||
|
||||
/**
|
||||
* True if terms are stored as term vector together with their offsets
|
||||
* (start and end positon in source text).
|
||||
*/
|
||||
boolean isStoreOffsetWithTermVector();
|
||||
|
||||
/**
|
||||
* True if terms are stored as term vector together with their token positions.
|
||||
*/
|
||||
boolean isStorePositionWithTermVector();
|
||||
|
||||
/** True if the value of the field is stored as binary */
|
||||
boolean isBinary();
|
||||
|
||||
/** True if norms are omitted for this indexed field */
|
||||
boolean getOmitNorms();
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit normalization factors associated with this indexed field.
|
||||
* This effectively disables indexing boosts and length normalization for this field.
|
||||
*/
|
||||
void setOmitNorms(boolean omitNorms);
|
||||
|
||||
/**
|
||||
* Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
|
||||
* it's values via {@link #stringValue()} or {@link #getBinaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
|
||||
* retrieved the {@link Document} is still open.
|
||||
*
|
||||
* @return true if this field can be loaded lazily
|
||||
*/
|
||||
boolean isLazy();
|
||||
|
||||
/**
|
||||
* Returns offset into byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return index of the first character in byte[] segment that represents this Field value
|
||||
*/
|
||||
abstract int getBinaryOffset();
|
||||
|
||||
/**
|
||||
* Returns length of byte[] segment that is used as value, if Field is not binary
|
||||
* returned value is undefined
|
||||
* @return length of byte[] segment that represents this Field value
|
||||
*/
|
||||
abstract int getBinaryLength();
|
||||
|
||||
/**
|
||||
* Return the raw byte[] for the binary field. Note that
|
||||
* you must also call {@link #getBinaryLength} and {@link
|
||||
* #getBinaryOffset} to know which range of bytes in this
|
||||
* returned array belong to the field.
|
||||
* @return reference to the Field value as byte[].
|
||||
*/
|
||||
abstract byte[] getBinaryValue();
|
||||
|
||||
/**
|
||||
* Return the raw byte[] for the binary field. Note that
|
||||
* you must also call {@link #getBinaryLength} and {@link
|
||||
* #getBinaryOffset} to know which range of bytes in this
|
||||
* returned array belong to the field.<p>
|
||||
* About reuse: if you pass in the result byte[] and it is
|
||||
* used, likely the underlying implementation will hold
|
||||
* onto this byte[] and return it in future calls to
|
||||
* {@link #getBinaryValue()}.
|
||||
* So if you subsequently re-use the same byte[] elsewhere
|
||||
* it will alter this Fieldable's value.
|
||||
* @param result User defined buffer that will be used if
|
||||
* possible. If this is null or not large enough, a new
|
||||
* buffer is allocated
|
||||
* @return reference to the Field value as byte[].
|
||||
*/
|
||||
abstract byte[] getBinaryValue(byte[] result);
|
||||
|
||||
/** @see #setIndexOptions */
|
||||
IndexOptions getIndexOptions();
|
||||
|
||||
/** Expert:
|
||||
*
|
||||
* If set, omit term freq, and optionally positions and payloads from
|
||||
* postings for this field.
|
||||
*
|
||||
* <p><b>NOTE</b>: While this option reduces storage space
|
||||
* required in the index, it also means any query
|
||||
* requiring positional information, such as {@link
|
||||
* PhraseQuery} or {@link SpanQuery} subclasses will
|
||||
* fail with an exception.
|
||||
*/
|
||||
void setIndexOptions(IndexOptions indexOptions);
|
||||
|
||||
/**
|
||||
* Returns the {@link PerDocFieldValues}
|
||||
*/
|
||||
public PerDocFieldValues getDocValues();
|
||||
|
||||
/**
|
||||
* Sets the {@link PerDocFieldValues} for this field. If
|
||||
* {@link PerDocFieldValues} is set this field will store per-document values
|
||||
*
|
||||
* @see IndexDocValues
|
||||
*/
|
||||
public void setDocValues(PerDocFieldValues docValues);
|
||||
|
||||
/**
|
||||
* Returns <code>true</code> iff {@link PerDocFieldValues} are set on this
|
||||
* field.
|
||||
*/
|
||||
public boolean hasDocValues();
|
||||
|
||||
/**
|
||||
* Returns the {@link ValueType} of the set {@link PerDocFieldValues} or
|
||||
* <code>null</code> if not set.
|
||||
*/
|
||||
public ValueType docValuesType();
|
||||
}
|
|
@ -20,16 +20,13 @@ import java.io.Reader;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* This class provides a {@link AbstractField} that enables storing of typed
|
||||
* This class provides a {@link Field} that enables storing of typed
|
||||
* per-document values for scoring, sorting or value retrieval. Here's an
|
||||
* example usage, adding an int value:
|
||||
*
|
||||
|
@ -54,16 +51,14 @@ import org.apache.lucene.util.BytesRef;
|
|||
* </pre>
|
||||
*
|
||||
* <p>
|
||||
* If doc values are stored in addition to an indexed ({@link Index}) or stored
|
||||
* ({@link Store}) value it's recommended to use the {@link IndexDocValuesField}'s
|
||||
* {@link #set(AbstractField)} API:
|
||||
* If doc values are stored in addition to an indexed ({@link FieldType#setIndexed(boolean)}) or stored
|
||||
* ({@link FieldType#setStored(boolean)}) value it's recommended to pass the appropriate {@link FieldType}
|
||||
* when creating the field:
|
||||
*
|
||||
* <pre>
|
||||
* IndexDocValuesField field = new IndexDocValuesField(name);
|
||||
* Field indexedField = new Field(name, stringValue, Stored.NO, Indexed.ANALYZED);
|
||||
* IndexDocValuesField field = new IndexDocValuesField(name, StringField.TYPE_STORED);
|
||||
* Document document = new Document();
|
||||
* document.add(indexedField);
|
||||
* field.set(indexedField);
|
||||
* document.add(field);
|
||||
* for(all documents) {
|
||||
* ...
|
||||
* field.setInt(value)
|
||||
|
@ -73,7 +68,8 @@ import org.apache.lucene.util.BytesRef;
|
|||
* </pre>
|
||||
*
|
||||
* */
|
||||
public class IndexDocValuesField extends AbstractField implements PerDocFieldValues {
|
||||
// TODO: maybe rename to DocValuesField?
|
||||
public class IndexDocValuesField extends Field implements PerDocFieldValues {
|
||||
|
||||
protected BytesRef bytes;
|
||||
protected double doubleValue;
|
||||
|
@ -85,21 +81,27 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
* Creates a new {@link IndexDocValuesField} with the given name.
|
||||
*/
|
||||
public IndexDocValuesField(String name) {
|
||||
super(name, Store.NO, Index.NO, TermVector.NO);
|
||||
setDocValues(this);
|
||||
this(name, new FieldType());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link IndexDocValuesField} prototype
|
||||
*/
|
||||
IndexDocValuesField() {
|
||||
this("");
|
||||
public IndexDocValuesField(String name, FieldType type) {
|
||||
this(name, type, null);
|
||||
}
|
||||
|
||||
public IndexDocValuesField(String name, FieldType type, String value) {
|
||||
super(name, type);
|
||||
fieldsData = value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public PerDocFieldValues docValues() {
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>long</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
* default type use {@link #setDocValuesType(ValueType)}.
|
||||
*/
|
||||
public void setInt(long value) {
|
||||
setInt(value, false);
|
||||
|
@ -124,7 +126,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
/**
|
||||
* Sets the given <code>int</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
* default type use {@link #setDocValuesType(ValueType)}.
|
||||
*/
|
||||
public void setInt(int value) {
|
||||
setInt(value, false);
|
||||
|
@ -149,7 +151,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
/**
|
||||
* Sets the given <code>short</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
* default type use {@link #setDocValuesType(ValueType)}.
|
||||
*/
|
||||
public void setInt(short value) {
|
||||
setInt(value, false);
|
||||
|
@ -174,11 +176,12 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
/**
|
||||
* Sets the given <code>byte</code> value and sets the field's {@link ValueType} to
|
||||
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
|
||||
* default type use {@link #setType(ValueType)}.
|
||||
* default type use {@link #setDocValuesType(ValueType)}.
|
||||
*/
|
||||
public void setInt(byte value) {
|
||||
setInt(value, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the given <code>byte</code> value as a 8 bit signed integer.
|
||||
*
|
||||
|
@ -198,7 +201,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
/**
|
||||
* Sets the given <code>float</code> value and sets the field's {@link ValueType}
|
||||
* to {@link ValueType#FLOAT_32} unless already set. If you want to
|
||||
* change the type use {@link #setType(ValueType)}.
|
||||
* change the type use {@link #setDocValuesType(ValueType)}.
|
||||
*/
|
||||
public void setFloat(float value) {
|
||||
if (type == null) {
|
||||
|
@ -210,7 +213,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
/**
|
||||
* Sets the given <code>double</code> value and sets the field's {@link ValueType}
|
||||
* to {@link ValueType#FLOAT_64} unless already set. If you want to
|
||||
* change the default type use {@link #setType(ValueType)}.
|
||||
* change the default type use {@link #setDocValuesType(ValueType)}.
|
||||
*/
|
||||
public void setFloat(double value) {
|
||||
if (type == null) {
|
||||
|
@ -241,7 +244,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
if (value == null) {
|
||||
throw new IllegalArgumentException("value must not be null");
|
||||
}
|
||||
setType(type);
|
||||
setDocValuesType(type);
|
||||
if (bytes == null) {
|
||||
bytes = new BytesRef(value);
|
||||
} else {
|
||||
|
@ -289,20 +292,13 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
/**
|
||||
* Sets the {@link ValueType} for this field.
|
||||
*/
|
||||
public void setType(ValueType type) {
|
||||
public void setDocValuesType(ValueType type) {
|
||||
if (type == null) {
|
||||
throw new IllegalArgumentException("Type must not be null");
|
||||
}
|
||||
this.type = type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the field's {@link ValueType}
|
||||
*/
|
||||
public ValueType type() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns always <code>null</code>
|
||||
*/
|
||||
|
@ -310,13 +306,6 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns always <code>null</code>
|
||||
*/
|
||||
public String stringValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns always <code>null</code>
|
||||
*/
|
||||
|
@ -324,25 +313,14 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets this {@link IndexDocValuesField} to the given {@link AbstractField} and
|
||||
* returns the given field. Any modifications to this instance will be visible
|
||||
* to the given field.
|
||||
*/
|
||||
public <T extends AbstractField> T set(T field) {
|
||||
field.setDocValues(this);
|
||||
return field;
|
||||
@Override
|
||||
public ValueType docValuesType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a new {@link PerDocFieldValues} instance on the given field with the
|
||||
* given type and returns it.
|
||||
*
|
||||
*/
|
||||
public static <T extends AbstractField> T set(T field, ValueType type) {
|
||||
if (field instanceof IndexDocValuesField)
|
||||
return field;
|
||||
final IndexDocValuesField valField = new IndexDocValuesField();
|
||||
@Override
|
||||
public String toString() {
|
||||
final String value;
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
|
@ -350,9 +328,43 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
BytesRef ref = field.isBinary() ? new BytesRef(field.getBinaryValue(),
|
||||
field.getBinaryOffset(), field.getBinaryLength()) : new BytesRef(
|
||||
field.stringValue());
|
||||
value = "bytes:bytes.utf8ToString();";
|
||||
break;
|
||||
case VAR_INTS:
|
||||
value = "int:" + longValue;
|
||||
break;
|
||||
case FLOAT_32:
|
||||
value = "float32:" + doubleValue;
|
||||
break;
|
||||
case FLOAT_64:
|
||||
value = "float64:" + doubleValue;
|
||||
break;
|
||||
default:
|
||||
throw new IllegalArgumentException("unknown type: " + type);
|
||||
}
|
||||
return "<" + name() + ": IndexDocValuesField " + value + ">";
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an IndexDocValuesField holding the value from
|
||||
* the provided string field, as the specified type. The
|
||||
* incoming field must have a string value. The name, {@link
|
||||
* FieldType} and string value are carried over from the
|
||||
* incoming Field.
|
||||
*/
|
||||
public static IndexDocValuesField build(Field field, ValueType type) {
|
||||
if (field instanceof IndexDocValuesField) {
|
||||
return (IndexDocValuesField) field;
|
||||
}
|
||||
final IndexDocValuesField valField = new IndexDocValuesField(field.name(), field.getFieldType(), field.stringValue());
|
||||
switch (type) {
|
||||
case BYTES_FIXED_DEREF:
|
||||
case BYTES_FIXED_SORTED:
|
||||
case BYTES_FIXED_STRAIGHT:
|
||||
case BYTES_VAR_DEREF:
|
||||
case BYTES_VAR_SORTED:
|
||||
case BYTES_VAR_STRAIGHT:
|
||||
BytesRef ref = field.isBinary() ? field.binaryValue() : new BytesRef(field.stringValue());
|
||||
valField.setBytes(ref, type);
|
||||
break;
|
||||
case VAR_INTS:
|
||||
|
@ -367,7 +379,6 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
|
|||
default:
|
||||
throw new IllegalArgumentException("unknown type: " + type);
|
||||
}
|
||||
return valField.set(field);
|
||||
return valField;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -22,22 +22,24 @@ import java.io.Reader;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.NumericTokenStream;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.document.NumericField.DataType;
|
||||
import org.apache.lucene.util.NumericUtils;
|
||||
import org.apache.lucene.search.NumericRangeQuery; // javadocs
|
||||
import org.apache.lucene.search.NumericRangeFilter; // javadocs
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
|
||||
/**
|
||||
* <p>This class provides a {@link Field} that enables indexing
|
||||
* of numeric values for efficient range filtering and
|
||||
* sorting. Here's an example usage, adding an int value:
|
||||
* <p>
|
||||
* This class provides a {@link Field} that enables indexing of numeric values
|
||||
* for efficient range filtering and sorting. Here's an example usage, adding an
|
||||
* int value:
|
||||
*
|
||||
* <pre>
|
||||
* document.add(new NumericField(name).setIntValue(value));
|
||||
* </pre>
|
||||
*
|
||||
* For optimal performance, re-use the
|
||||
* <code>NumericField</code> and {@link Document} instance for more than
|
||||
* one document:
|
||||
* For optimal performance, re-use the <code>NumericField</code> and
|
||||
* {@link Document} instance for more than one document:
|
||||
*
|
||||
* <pre>
|
||||
* NumericField field = new NumericField(name);
|
||||
|
@ -74,7 +76,7 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
*
|
||||
* <p>By default, a <code>NumericField</code>'s value is not stored but
|
||||
* is indexed for range filtering and sorting. You can use
|
||||
* the {@link #NumericField(String,Field.Store,boolean)}
|
||||
* the {@link #NumericField(String, FieldType)}
|
||||
* constructor if you need to change these defaults.</p>
|
||||
*
|
||||
* <p>You may add the same field name as a <code>NumericField</code> to
|
||||
|
@ -102,7 +104,7 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
* default value, 4, was selected for a reasonable tradeoff
|
||||
* of disk space consumption versus performance. You can
|
||||
* use the expert constructor {@link
|
||||
* #NumericField(String,int,Field.Store,boolean)} if you'd
|
||||
* #NumericField(String,int,FieldType)} if you'd
|
||||
* like to change the value. Note that you must also
|
||||
* specify a congruent value when creating {@link
|
||||
* NumericRangeQuery} or {@link NumericRangeFilter}.
|
||||
|
@ -129,94 +131,136 @@ import org.apache.lucene.search.FieldCache; // javadocs
|
|||
*
|
||||
* @since 2.9
|
||||
*/
|
||||
public final class NumericField extends AbstractField {
|
||||
public final class NumericField extends Field {
|
||||
|
||||
/** Data type of the value in {@link NumericField}.
|
||||
* @since 3.2
|
||||
*/
|
||||
public static enum DataType { INT, LONG, FLOAT, DOUBLE }
|
||||
|
||||
public static final FieldType TYPE_UNSTORED = new FieldType();
|
||||
public static final FieldType TYPE_STORED = new FieldType();
|
||||
static {
|
||||
TYPE_UNSTORED.setIndexed(true);
|
||||
TYPE_UNSTORED.setTokenized(true);
|
||||
TYPE_UNSTORED.setOmitNorms(true);
|
||||
TYPE_UNSTORED.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
TYPE_UNSTORED.freeze();
|
||||
|
||||
TYPE_STORED.setIndexed(true);
|
||||
TYPE_STORED.setStored(true);
|
||||
TYPE_STORED.setTokenized(true);
|
||||
TYPE_STORED.setOmitNorms(true);
|
||||
TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
//public static enum DataType { INT, LONG, FLOAT, DOUBLE }
|
||||
|
||||
private DataType dataType;
|
||||
private transient NumericTokenStream numericTS;
|
||||
private DataType type;
|
||||
private final int precisionStep;
|
||||
|
||||
/**
|
||||
* Creates a field for numeric values using the default <code>precisionStep</code>
|
||||
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
|
||||
* a numeric value, before indexing a document containing this field,
|
||||
* set a value using the various set<em>???</em>Value() methods.
|
||||
* This constructor creates an indexed, but not stored field.
|
||||
* @param name the field name
|
||||
* Creates a field for numeric values using the default
|
||||
* <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||
* The instance is not yet initialized with a numeric value, before indexing a
|
||||
* document containing this field, set a value using the various set
|
||||
* <em>???</em>Value() methods. This constructor creates an indexed, but not
|
||||
* stored field.
|
||||
*
|
||||
* @param name
|
||||
* the field name
|
||||
*/
|
||||
public NumericField(String name) {
|
||||
this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
|
||||
this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.TYPE_UNSTORED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a field for numeric values using the default <code>precisionStep</code>
|
||||
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
|
||||
* a numeric value, before indexing a document containing this field,
|
||||
* set a value using the various set<em>???</em>Value() methods.
|
||||
* @param name the field name
|
||||
* @param store if the field should be stored, {@link Document#getFieldable}
|
||||
* then returns {@code NumericField} instances on search results.
|
||||
* @param index if the field should be indexed using {@link NumericTokenStream}
|
||||
* Creates a field for numeric values using the default
|
||||
* <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
|
||||
* The instance is not yet initialized with a numeric value, before indexing a
|
||||
* document containing this field, set a value using the various set
|
||||
* <em>???</em>Value() methods.
|
||||
*
|
||||
* @param name
|
||||
* the field name
|
||||
* @param type
|
||||
* if the defualt field should be altered, e.g. stored,
|
||||
* {@link Document#getField} then returns {@code NumericField}
|
||||
* instances on search results, or indexed using
|
||||
* {@link NumericTokenStream}
|
||||
*/
|
||||
public NumericField(String name, Field.Store store, boolean index) {
|
||||
this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
|
||||
public NumericField(String name, FieldType type) {
|
||||
this(name, NumericUtils.PRECISION_STEP_DEFAULT, type);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a field for numeric values with the specified
|
||||
* <code>precisionStep</code>. The instance is not yet initialized with
|
||||
* a numeric value, before indexing a document containing this field,
|
||||
* set a value using the various set<em>???</em>Value() methods.
|
||||
* This constructor creates an indexed, but not stored field.
|
||||
* @param name the field name
|
||||
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
|
||||
* <code>precisionStep</code>. The instance is not yet initialized with a
|
||||
* numeric value, before indexing a document containing this field, set a
|
||||
* value using the various set<em>???</em>Value() methods. This constructor
|
||||
* creates an indexed, but not stored field.
|
||||
*
|
||||
* @param name
|
||||
* the field name
|
||||
* @param precisionStep
|
||||
* the used <a
|
||||
* href="../search/NumericRangeQuery.html#precisionStepDesc"
|
||||
* >precision step</a>
|
||||
*/
|
||||
public NumericField(String name, int precisionStep) {
|
||||
this(name, precisionStep, Field.Store.NO, true);
|
||||
this(name, precisionStep, NumericField.TYPE_UNSTORED);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a field for numeric values with the specified
|
||||
* <code>precisionStep</code>. The instance is not yet initialized with
|
||||
* a numeric value, before indexing a document containing this field,
|
||||
* set a value using the various set<em>???</em>Value() methods.
|
||||
* @param name the field name
|
||||
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
|
||||
* @param store if the field should be stored, {@link Document#getFieldable}
|
||||
* then returns {@code NumericField} instances on search results.
|
||||
* @param index if the field should be indexed using {@link NumericTokenStream}
|
||||
* <code>precisionStep</code>. The instance is not yet initialized with a
|
||||
* numeric value, before indexing a document containing this field, set a
|
||||
* value using the various set<em>???</em>Value() methods.
|
||||
*
|
||||
* @param name
|
||||
* the field name
|
||||
* @param precisionStep
|
||||
* the used <a
|
||||
* href="../search/NumericRangeQuery.html#precisionStepDesc"
|
||||
* >precision step</a>
|
||||
* @param type
|
||||
* if the defualt field should be altered, e.g. stored,
|
||||
* {@link Document#getField} then returns {@code NumericField}
|
||||
* instances on search results, or indexed using
|
||||
* {@link NumericTokenStream}
|
||||
*/
|
||||
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
|
||||
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
|
||||
public NumericField(String name, int precisionStep, FieldType type) {
|
||||
super(name, type);
|
||||
this.precisionStep = precisionStep;
|
||||
setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
}
|
||||
|
||||
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
|
||||
public TokenStream tokenStreamValue() {
|
||||
if (!isIndexed())
|
||||
return null;
|
||||
if (!indexed()) return null;
|
||||
if (numericTS == null) {
|
||||
// lazy init the TokenStream as it is heavy to instantiate (attributes,...),
|
||||
// lazy init the TokenStream as it is heavy to instantiate
|
||||
// (attributes,...),
|
||||
// if not needed (stored field loading)
|
||||
numericTS = new NumericTokenStream(precisionStep);
|
||||
// initialize value in TokenStream
|
||||
if (fieldsData != null) {
|
||||
assert type != null;
|
||||
assert dataType != null;
|
||||
final Number val = (Number) fieldsData;
|
||||
switch (type) {
|
||||
switch (dataType) {
|
||||
case INT:
|
||||
numericTS.setIntValue(val.intValue()); break;
|
||||
numericTS.setIntValue(val.intValue());
|
||||
break;
|
||||
case LONG:
|
||||
numericTS.setLongValue(val.longValue()); break;
|
||||
numericTS.setLongValue(val.longValue());
|
||||
break;
|
||||
case FLOAT:
|
||||
numericTS.setFloatValue(val.floatValue()); break;
|
||||
numericTS.setFloatValue(val.floatValue());
|
||||
break;
|
||||
case DOUBLE:
|
||||
numericTS.setDoubleValue(val.doubleValue()); break;
|
||||
numericTS.setDoubleValue(val.doubleValue());
|
||||
break;
|
||||
default:
|
||||
assert false : "Should never get here";
|
||||
}
|
||||
|
@ -225,27 +269,28 @@ public final class NumericField extends AbstractField {
|
|||
return numericTS;
|
||||
}
|
||||
|
||||
/** Returns always <code>null</code> for numeric fields */
|
||||
@Override
|
||||
public byte[] getBinaryValue(byte[] result){
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns always <code>null</code> for numeric fields */
|
||||
public Reader readerValue() {
|
||||
return null;
|
||||
}
|
||||
|
||||
/** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
|
||||
* on search results. It is recommended to use {@link Document#getFieldable} instead
|
||||
* that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
|
||||
* to return the stored value. */
|
||||
/**
|
||||
* Returns the numeric value as a string. It is recommended to
|
||||
* use {@link Document#getField} instead that returns {@code NumericField}
|
||||
* instances. You can then use {@link #numericValue} to return the stored
|
||||
* value.
|
||||
*/
|
||||
@Override
|
||||
public String stringValue() {
|
||||
return (fieldsData == null) ? null : fieldsData.toString();
|
||||
}
|
||||
|
||||
/** Returns the current numeric value as a subclass of {@link Number}, <code>null</code> if not yet initialized. */
|
||||
public Number getNumericValue() {
|
||||
/**
|
||||
* Returns the current numeric value as a subclass of {@link Number},
|
||||
* <code>null</code> if not yet initialized.
|
||||
*/
|
||||
@Override
|
||||
public Number numericValue() {
|
||||
return (Number) fieldsData;
|
||||
}
|
||||
|
||||
|
@ -254,62 +299,78 @@ public final class NumericField extends AbstractField {
|
|||
return precisionStep;
|
||||
}
|
||||
|
||||
/** Returns the data type of the current value, {@code null} if not yet set.
|
||||
/**
|
||||
* Returns the data type of the current value, {@code null} if not yet set.
|
||||
*
|
||||
* @since 3.2
|
||||
*/
|
||||
public DataType getDataType() {
|
||||
return type;
|
||||
@Override
|
||||
public DataType numericDataType() {
|
||||
return dataType;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean numeric() {
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the field with the supplied <code>long</code> value.
|
||||
* @param value the numeric value
|
||||
*
|
||||
* @param value
|
||||
* the numeric value
|
||||
* @return this instance, because of this you can use it the following way:
|
||||
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
|
||||
*/
|
||||
public NumericField setLongValue(final long value) {
|
||||
if (numericTS != null) numericTS.setLongValue(value);
|
||||
fieldsData = Long.valueOf(value);
|
||||
type = DataType.LONG;
|
||||
dataType = DataType.LONG;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the field with the supplied <code>int</code> value.
|
||||
* @param value the numeric value
|
||||
*
|
||||
* @param value
|
||||
* the numeric value
|
||||
* @return this instance, because of this you can use it the following way:
|
||||
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
|
||||
*/
|
||||
public NumericField setIntValue(final int value) {
|
||||
if (numericTS != null) numericTS.setIntValue(value);
|
||||
fieldsData = Integer.valueOf(value);
|
||||
type = DataType.INT;
|
||||
dataType = DataType.INT;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the field with the supplied <code>double</code> value.
|
||||
* @param value the numeric value
|
||||
*
|
||||
* @param value
|
||||
* the numeric value
|
||||
* @return this instance, because of this you can use it the following way:
|
||||
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
|
||||
*/
|
||||
public NumericField setDoubleValue(final double value) {
|
||||
if (numericTS != null) numericTS.setDoubleValue(value);
|
||||
fieldsData = Double.valueOf(value);
|
||||
type = DataType.DOUBLE;
|
||||
dataType = DataType.DOUBLE;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes the field with the supplied <code>float</code> value.
|
||||
* @param value the numeric value
|
||||
*
|
||||
* @param value
|
||||
* the numeric value
|
||||
* @return this instance, because of this you can use it the following way:
|
||||
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
|
||||
*/
|
||||
public NumericField setFloatValue(final float value) {
|
||||
if (numericTS != null) numericTS.setFloatValue(value);
|
||||
fieldsData = Float.valueOf(value);
|
||||
type = DataType.FLOAT;
|
||||
dataType = DataType.FLOAT;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public final class StringField extends Field {
|
||||
|
||||
public static final FieldType TYPE_UNSTORED = new FieldType();
|
||||
public static final FieldType TYPE_STORED = new FieldType();
|
||||
static {
|
||||
TYPE_UNSTORED.setIndexed(true);
|
||||
TYPE_UNSTORED.setOmitNorms(true);
|
||||
TYPE_UNSTORED.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
TYPE_UNSTORED.freeze();
|
||||
|
||||
TYPE_STORED.setIndexed(true);
|
||||
TYPE_STORED.setStored(true);
|
||||
TYPE_STORED.setOmitNorms(true);
|
||||
TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
|
||||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
public StringField(String name, String value) {
|
||||
super(name, TYPE_UNSTORED, value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String stringValue() {
|
||||
return (fieldsData == null) ? null : fieldsData.toString();
|
||||
}
|
||||
|
||||
public boolean isNumeric() {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,54 @@
|
|||
package org.apache.lucene.document;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
public final class TextField extends Field {
|
||||
|
||||
public static final FieldType TYPE_UNSTORED = new FieldType();
|
||||
public static final FieldType TYPE_STORED = new FieldType();
|
||||
static {
|
||||
TYPE_UNSTORED.setIndexed(true);
|
||||
TYPE_UNSTORED.setTokenized(true);
|
||||
TYPE_UNSTORED.freeze();
|
||||
|
||||
TYPE_STORED.setIndexed(true);
|
||||
TYPE_STORED.setStored(true);
|
||||
TYPE_STORED.setTokenized(true);
|
||||
TYPE_STORED.freeze();
|
||||
}
|
||||
|
||||
public TextField(String name, Reader reader) {
|
||||
super(name, TextField.TYPE_UNSTORED, reader);
|
||||
}
|
||||
|
||||
public TextField(String name, String value) {
|
||||
super(name, TextField.TYPE_UNSTORED, value);
|
||||
}
|
||||
|
||||
public TextField(String name, TokenStream stream) {
|
||||
super(name, TextField.TYPE_UNSTORED, stream);
|
||||
}
|
||||
|
||||
public boolean isNumeric() {
|
||||
return false;
|
||||
}
|
||||
}
|
|
@ -22,16 +22,16 @@
|
|||
<body>
|
||||
<p>The logical representation of a {@link org.apache.lucene.document.Document} for indexing and searching.</p>
|
||||
<p>The document package provides the user level logical representation of content to be indexed and searched. The
|
||||
package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.document.Fieldable}s.</p>
|
||||
<h2>Document and Fieldable</h2>
|
||||
<p>A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.document.Fieldable}s. A
|
||||
{@link org.apache.lucene.document.Fieldable} is a logical representation of a user's content that needs to be indexed or stored.
|
||||
{@link org.apache.lucene.document.Fieldable}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized,
|
||||
stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.document.Fieldable}
|
||||
package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.index.IndexableField}s.</p>
|
||||
<h2>Document and IndexableField</h2>
|
||||
<p>A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.index.IndexableField}s. A
|
||||
{@link org.apache.lucene.index.IndexableField} is a logical representation of a user's content that needs to be indexed or stored.
|
||||
{@link org.apache.lucene.index.IndexableField}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized,
|
||||
stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.index.IndexableField}
|
||||
for specifics on these properties.
|
||||
</p>
|
||||
<p>Note: it is common to refer to {@link org.apache.lucene.document.Document}s having {@link org.apache.lucene.document.Field}s, even though technically they have
|
||||
{@link org.apache.lucene.document.Fieldable}s.</p>
|
||||
{@link org.apache.lucene.index.IndexableField}s.</p>
|
||||
<h2>Working with Documents</h2>
|
||||
<p>First and foremost, a {@link org.apache.lucene.document.Document} is something created by the user application. It is your job
|
||||
to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.)
|
||||
|
@ -45,7 +45,7 @@ package also provides utilities for working with {@link org.apache.lucene.docume
|
|||
to simplify indexing of numeric values (and also dates) for fast range range queries with {@link org.apache.lucene.search.NumericRangeQuery}
|
||||
(using a special sortable string representation of numeric values).</p>
|
||||
<p>The {@link org.apache.lucene.document.FieldSelector} class provides a mechanism to tell Lucene how to load Documents from
|
||||
storage. If no FieldSelector is used, all Fieldables on a Document will be loaded. As an example of the FieldSelector usage, consider
|
||||
storage. If no FieldSelector is used, all IndexableFields on a Document will be loaded. As an example of the FieldSelector usage, consider
|
||||
the common use case of
|
||||
displaying search results on a web page and then having users click through to see the full document. In this scenario, it is often
|
||||
the case that there are many small fields and one or two large fields (containing the contents of the original file). Before the FieldSelector,
|
||||
|
|
|
@ -17,6 +17,16 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.FieldType; // for javadocs
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
|
@ -28,21 +38,11 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.AbstractField; // for javadocs
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.codecs.BlockTreeTermsReader;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.index.values.ValuesEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -189,7 +189,7 @@ public class CheckIndex {
|
|||
|
||||
/** True if at least one of the fields in this segment
|
||||
* has position data
|
||||
* @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
|
||||
* @see FieldType#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
|
||||
public boolean hasProx;
|
||||
|
||||
/** Map that includes certain
|
||||
|
|
|
@ -29,8 +29,6 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.Lock;
|
||||
|
@ -559,12 +557,11 @@ class DirectoryReader extends IndexReader implements Cloneable {
|
|||
return maxDoc;
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
int i = readerIndex(n); // find segment num
|
||||
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
|
||||
int i = readerIndex(docID); // find segment num
|
||||
subReaders[i].document(docID - starts[i], visitor); // dispatch to segment reader
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -18,11 +18,10 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
||||
abstract class DocFieldConsumerPerField {
|
||||
/** Processes all occurrences of a single field */
|
||||
abstract void processFields(Fieldable[] fields, int count) throws IOException;
|
||||
abstract void processFields(IndexableField[] fields, int count) throws IOException;
|
||||
abstract void abort();
|
||||
abstract FieldInfo getFieldInfo();
|
||||
}
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
||||
final class DocFieldConsumersPerField extends DocFieldConsumerPerField {
|
||||
|
||||
|
@ -35,7 +34,7 @@ final class DocFieldConsumersPerField extends DocFieldConsumerPerField {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void processFields(Fieldable[] fields, int count) throws IOException {
|
||||
public void processFields(IndexableField[] fields, int count) throws IOException {
|
||||
one.processFields(fields, count);
|
||||
two.processFields(fields, count);
|
||||
}
|
||||
|
|
|
@ -22,15 +22,13 @@ import java.util.Collection;
|
|||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.DocState;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.index.codecs.PerDocConsumer;
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
|
@ -199,22 +197,16 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
consumer.startDocument();
|
||||
fieldsWriter.startDocument();
|
||||
|
||||
final Document doc = docState.doc;
|
||||
|
||||
fieldCount = 0;
|
||||
|
||||
final int thisFieldGen = fieldGen++;
|
||||
|
||||
final List<Fieldable> docFields = doc.getFields();
|
||||
final int numDocFields = docFields.size();
|
||||
|
||||
// Absorb any new fields first seen in this document.
|
||||
// Also absorb any changes to fields we had already
|
||||
// seen before (eg suddenly turning on norms or
|
||||
// vectors, etc.):
|
||||
|
||||
for(int i=0;i<numDocFields;i++) {
|
||||
Fieldable field = docFields.get(i);
|
||||
for(IndexableField field : docState.doc) {
|
||||
final String fieldName = field.name();
|
||||
|
||||
// Make sure we have a PerField allocated
|
||||
|
@ -231,21 +223,22 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
// needs to be more "pluggable" such that if I want
|
||||
// to have a new "thing" my Fields can do, I can
|
||||
// easily add it
|
||||
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
|
||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.indexed(), field.storeTermVectors(),
|
||||
field.storeTermVectorPositions(), field.storeTermVectorOffsets(),
|
||||
field.omitNorms(), false, field.indexOptions(), field.docValuesType());
|
||||
|
||||
fp = new DocFieldProcessorPerField(this, fi);
|
||||
fp.next = fieldHash[hashPos];
|
||||
fieldHash[hashPos] = fp;
|
||||
totalFieldCount++;
|
||||
|
||||
if (totalFieldCount >= fieldHash.length/2)
|
||||
if (totalFieldCount >= fieldHash.length/2) {
|
||||
rehash();
|
||||
}
|
||||
} else {
|
||||
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
|
||||
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
|
||||
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
|
||||
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.indexed(), field.storeTermVectors(),
|
||||
field.storeTermVectorPositions(), field.storeTermVectorOffsets(),
|
||||
field.omitNorms(), false, field.indexOptions(), field.docValuesType());
|
||||
}
|
||||
|
||||
if (thisFieldGen != fp.lastGen) {
|
||||
|
@ -266,12 +259,12 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
|
||||
fp.addField(field);
|
||||
|
||||
if (field.isStored()) {
|
||||
if (field.stored()) {
|
||||
fieldsWriter.addField(field, fp.fieldInfo);
|
||||
}
|
||||
if (field.hasDocValues()) {
|
||||
final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo);
|
||||
docValuesConsumer.add(docState.docID, field.getDocValues());
|
||||
final PerDocFieldValues docValues = field.docValues();
|
||||
if (docValues != null) {
|
||||
docValuesConsumer(docState, fp.fieldInfo).add(docState.docID, docValues);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -339,5 +332,4 @@ final class DocFieldProcessor extends DocConsumer {
|
|||
docValues.put(fieldInfo.name, docValuesConsumer);
|
||||
return docValuesConsumer;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
||||
|
@ -34,17 +33,17 @@ final class DocFieldProcessorPerField {
|
|||
int lastGen = -1;
|
||||
|
||||
int fieldCount;
|
||||
Fieldable[] fields = new Fieldable[1];
|
||||
IndexableField[] fields = new IndexableField[1];
|
||||
|
||||
public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) {
|
||||
this.consumer = docFieldProcessor.consumer.addField(fieldInfo);
|
||||
this.fieldInfo = fieldInfo;
|
||||
}
|
||||
|
||||
public void addField(Fieldable field) {
|
||||
public void addField(IndexableField field) {
|
||||
if (fieldCount == fields.length) {
|
||||
int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||
Fieldable[] newArray = new Fieldable[newSize];
|
||||
IndexableField[] newArray = new IndexableField[newSize];
|
||||
System.arraycopy(fields, 0, newArray, 0, fieldCount);
|
||||
fields = newArray;
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -61,27 +60,32 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
}
|
||||
|
||||
@Override
|
||||
public void processFields(final Fieldable[] fields,
|
||||
public void processFields(final IndexableField[] fields,
|
||||
final int count) throws IOException {
|
||||
|
||||
fieldState.reset(docState.doc.getBoost());
|
||||
fieldState.reset();
|
||||
|
||||
final boolean doInvert = consumer.start(fields, count);
|
||||
|
||||
for(int i=0;i<count;i++) {
|
||||
|
||||
final Fieldable field = fields[i];
|
||||
final IndexableField field = fields[i];
|
||||
|
||||
// TODO FI: this should be "genericized" to querying
|
||||
// consumer if it wants to see this particular field
|
||||
// tokenized.
|
||||
if (field.isIndexed() && doInvert) {
|
||||
if (field.indexed() && doInvert) {
|
||||
|
||||
if (i > 0)
|
||||
fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
|
||||
|
||||
if (!field.isTokenized()) { // un-tokenized field
|
||||
String stringValue = field.stringValue();
|
||||
// TODO (LUCENE-2309): this analysis logic should be
|
||||
// outside of indexer -- field should simply give us
|
||||
// a TokenStream, even for multi-valued fields
|
||||
|
||||
if (!field.tokenized()) { // un-tokenized field
|
||||
final String stringValue = field.stringValue();
|
||||
assert stringValue != null;
|
||||
final int valueLength = stringValue.length();
|
||||
parent.singleToken.reinit(stringValue, 0, valueLength);
|
||||
fieldState.attributeSource = parent.singleToken;
|
||||
|
@ -103,17 +107,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
final TokenStream stream;
|
||||
final TokenStream streamValue = field.tokenStreamValue();
|
||||
|
||||
if (streamValue != null)
|
||||
if (streamValue != null) {
|
||||
stream = streamValue;
|
||||
else {
|
||||
} else {
|
||||
// the field does not have a TokenStream,
|
||||
// so we have to obtain one from the analyzer
|
||||
final Reader reader; // find or make Reader
|
||||
final Reader readerValue = field.readerValue();
|
||||
|
||||
if (readerValue != null)
|
||||
if (readerValue != null) {
|
||||
reader = readerValue;
|
||||
else {
|
||||
} else {
|
||||
String stringValue = field.stringValue();
|
||||
if (stringValue == null) {
|
||||
throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
|
||||
|
@ -189,7 +193,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
|
|||
}
|
||||
|
||||
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
|
||||
fieldState.boost *= field.getBoost();
|
||||
fieldState.boost *= field.boost();
|
||||
}
|
||||
|
||||
// LUCENE-2387: don't hang onto the field, so GC can
|
||||
|
|
|
@ -0,0 +1,142 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
import java.util.HashSet;
|
||||
|
||||
import org.apache.lucene.document.BinaryField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/** A {@link StoredFieldVisitor} that creates a {@link
|
||||
* Document} containing all stored fields, or only specific
|
||||
* requested fields provided to {@link #DocumentStoredFieldVisitor(Set)}
|
||||
* This is used by {@link IndexReader#document(int)} to load a
|
||||
* document.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
|
||||
private final Document doc = new Document();
|
||||
private final Set<String> fieldsToAdd;
|
||||
|
||||
/** Load only fields named in the provided <code>Set<String></code>. */
|
||||
public DocumentStoredFieldVisitor(Set<String> fieldsToAdd) {
|
||||
this.fieldsToAdd = fieldsToAdd;
|
||||
}
|
||||
|
||||
/** Load only fields named in the provided <code>Set<String></code>. */
|
||||
public DocumentStoredFieldVisitor(String... fields) {
|
||||
fieldsToAdd = new HashSet<String>(fields.length);
|
||||
for(String field : fields) {
|
||||
fieldsToAdd.add(field);
|
||||
}
|
||||
}
|
||||
|
||||
/** Load all stored fields. */
|
||||
public DocumentStoredFieldVisitor() {
|
||||
this.fieldsToAdd = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException {
|
||||
if (accept(fieldInfo)) {
|
||||
final byte[] b = new byte[numBytes];
|
||||
in.readBytes(b, 0, b.length);
|
||||
doc.add(new BinaryField(fieldInfo.name, b));
|
||||
} else {
|
||||
in.seek(in.getFilePointer() + numBytes);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
|
||||
if (accept(fieldInfo)) {
|
||||
final byte[] b = new byte[numUTF8Bytes];
|
||||
in.readBytes(b, 0, b.length);
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.storeTermVector);
|
||||
ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector);
|
||||
ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector);
|
||||
ft.setStoreTermVectors(fieldInfo.storeTermVector);
|
||||
ft.setOmitNorms(fieldInfo.omitNorms);
|
||||
ft.setIndexOptions(fieldInfo.indexOptions);
|
||||
doc.add(new Field(fieldInfo.name,
|
||||
ft,
|
||||
new String(b, "UTF-8")));
|
||||
} else {
|
||||
in.seek(in.getFilePointer() + numUTF8Bytes);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean intField(FieldInfo fieldInfo, int value) {
|
||||
if (accept(fieldInfo)) {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
doc.add(new NumericField(fieldInfo.name, ft).setIntValue(value));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean longField(FieldInfo fieldInfo, long value) {
|
||||
if (accept(fieldInfo)) {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
doc.add(new NumericField(fieldInfo.name, ft).setLongValue(value));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean floatField(FieldInfo fieldInfo, float value) {
|
||||
if (accept(fieldInfo)) {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
doc.add(new NumericField(fieldInfo.name, ft).setFloatValue(value));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean doubleField(FieldInfo fieldInfo, double value) {
|
||||
if (accept(fieldInfo)) {
|
||||
FieldType ft = new FieldType(NumericField.TYPE_STORED);
|
||||
ft.setIndexed(fieldInfo.isIndexed);
|
||||
doc.add(new NumericField(fieldInfo.name, ft).setDoubleValue(value));
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean accept(FieldInfo fieldInfo) {
|
||||
return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name);
|
||||
}
|
||||
|
||||
public Document getDocument() {
|
||||
return doc;
|
||||
}
|
||||
}
|
|
@ -27,7 +27,6 @@ import java.util.Queue;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
||||
|
@ -320,7 +319,7 @@ final class DocumentsWriter {
|
|||
return maybeMerge;
|
||||
}
|
||||
|
||||
boolean updateDocuments(final Iterable<Document> docs, final Analyzer analyzer,
|
||||
boolean updateDocuments(final Iterable<? extends Iterable<? extends IndexableField>> docs, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
boolean maybeMerge = preUpdate();
|
||||
|
||||
|
@ -351,7 +350,7 @@ final class DocumentsWriter {
|
|||
return postUpdate(flushingDWPT, maybeMerge);
|
||||
}
|
||||
|
||||
boolean updateDocument(final Document doc, final Analyzer analyzer,
|
||||
boolean updateDocument(final Iterable<? extends IndexableField> doc, final Analyzer analyzer,
|
||||
final Term delTerm) throws CorruptIndexException, IOException {
|
||||
|
||||
boolean maybeMerge = preUpdate();
|
||||
|
|
|
@ -26,7 +26,6 @@ import java.text.NumberFormat;
|
|||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
|
||||
import org.apache.lucene.search.SimilarityProvider;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -90,7 +89,7 @@ public class DocumentsWriterPerThread {
|
|||
PrintStream infoStream;
|
||||
SimilarityProvider similarityProvider;
|
||||
int docID;
|
||||
Document doc;
|
||||
Iterable<? extends IndexableField> doc;
|
||||
String maxTermPrefix;
|
||||
|
||||
DocState(DocumentsWriterPerThread docWriter) {
|
||||
|
@ -213,7 +212,7 @@ public class DocumentsWriterPerThread {
|
|||
return retval;
|
||||
}
|
||||
|
||||
public void updateDocument(Document doc, Analyzer analyzer, Term delTerm) throws IOException {
|
||||
public void updateDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer, Term delTerm) throws IOException {
|
||||
assert writer.testPoint("DocumentsWriterPerThread addDocument start");
|
||||
assert deleteQueue != null;
|
||||
docState.doc = doc;
|
||||
|
@ -263,7 +262,7 @@ public class DocumentsWriterPerThread {
|
|||
finishDocument(delTerm);
|
||||
}
|
||||
|
||||
public int updateDocuments(Iterable<Document> docs, Analyzer analyzer, Term delTerm) throws IOException {
|
||||
public int updateDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer, Term delTerm) throws IOException {
|
||||
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
|
||||
assert deleteQueue != null;
|
||||
docState.analyzer = analyzer;
|
||||
|
@ -280,7 +279,7 @@ public class DocumentsWriterPerThread {
|
|||
}
|
||||
int docCount = 0;
|
||||
try {
|
||||
for(Document doc : docs) {
|
||||
for(Iterable<? extends IndexableField> doc : docs) {
|
||||
docState.doc = doc;
|
||||
docState.docID = numDocsInRAM;
|
||||
docCount++;
|
||||
|
|
|
@ -30,9 +30,9 @@ public final class FieldInfo {
|
|||
|
||||
|
||||
// true if term vector for this field should be stored
|
||||
boolean storeTermVector;
|
||||
boolean storeOffsetWithTermVector;
|
||||
boolean storePositionWithTermVector;
|
||||
public boolean storeTermVector;
|
||||
public boolean storeOffsetWithTermVector;
|
||||
public boolean storePositionWithTermVector;
|
||||
|
||||
public boolean omitNorms; // omit norms associated with indexed fields
|
||||
public IndexOptions indexOptions;
|
||||
|
|
|
@ -39,8 +39,8 @@ import org.apache.lucene.store.IndexInput;
|
|||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.CodecUtil;
|
||||
|
||||
/** Access to the Fieldable Info file that describes document fields and whether or
|
||||
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
|
||||
/** Access to the Field Info file that describes document fields and whether or
|
||||
* not they are indexed. Each segment has a separate Field Info file. Objects
|
||||
* of this class are thread-safe for multiple readers, but only one thread can
|
||||
* be adding documents at a time, with no other reader or writer threads
|
||||
* accessing this object.
|
||||
|
@ -381,7 +381,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
|
|||
/**
|
||||
* Calls 5 parameter add with false for all TermVector parameters.
|
||||
*
|
||||
* @param name The name of the Fieldable
|
||||
* @param name The name of the IndexableField
|
||||
* @param isIndexed true if the field is indexed
|
||||
* @see #addOrUpdate(String, boolean, boolean, boolean, boolean)
|
||||
*/
|
||||
|
|
|
@ -50,14 +50,14 @@ public final class FieldInvertState {
|
|||
* Re-initialize the state, using this boost value.
|
||||
* @param docBoost boost value to use.
|
||||
*/
|
||||
void reset(float docBoost) {
|
||||
void reset() {
|
||||
position = 0;
|
||||
length = 0;
|
||||
numOverlap = 0;
|
||||
offset = 0;
|
||||
maxTermFrequency = 0;
|
||||
uniqueTermCount = 0;
|
||||
boost = docBoost;
|
||||
boost = 1.0f;
|
||||
attributeSource = null;
|
||||
}
|
||||
|
||||
|
|
|
@ -17,16 +17,9 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.AbstractField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
@ -34,9 +27,6 @@ import org.apache.lucene.util.CloseableThreadLocal;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Class responsible for access to stored document fields.
|
||||
|
@ -49,6 +39,7 @@ public final class FieldsReader implements Cloneable, Closeable {
|
|||
private final static int FORMAT_SIZE = 4;
|
||||
|
||||
private final FieldInfos fieldInfos;
|
||||
private CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
|
||||
|
||||
// The main fieldStream, used only for cloning.
|
||||
private final IndexInput cloneableFieldsStream;
|
||||
|
@ -68,7 +59,6 @@ public final class FieldsReader implements Cloneable, Closeable {
|
|||
// file. This will be 0 if we have our own private file.
|
||||
private int docStoreOffset;
|
||||
|
||||
private CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
|
||||
private boolean isOriginal = false;
|
||||
|
||||
/** Returns a cloned FieldsReader that shares open
|
||||
|
@ -200,50 +190,52 @@ public final class FieldsReader implements Cloneable, Closeable {
|
|||
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
|
||||
}
|
||||
|
||||
public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
seekIndex(n);
|
||||
long position = indexStream.readLong();
|
||||
fieldsStream.seek(position);
|
||||
fieldsStream.seek(indexStream.readLong());
|
||||
|
||||
Document doc = new Document();
|
||||
int numFields = fieldsStream.readVInt();
|
||||
out: for (int i = 0; i < numFields; i++) {
|
||||
final int numFields = fieldsStream.readVInt();
|
||||
for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
|
||||
int fieldNumber = fieldsStream.readVInt();
|
||||
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
|
||||
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
|
||||
|
||||
int bits = fieldsStream.readByte() & 0xFF;
|
||||
assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
|
||||
assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
|
||||
|
||||
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
|
||||
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
||||
final boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
|
||||
final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
|
||||
|
||||
switch (acceptField) {
|
||||
case LOAD:
|
||||
addField(doc, fi, binary, tokenize, numeric);
|
||||
final boolean doStop;
|
||||
if (binary) {
|
||||
final int numBytes = fieldsStream.readVInt();
|
||||
doStop = visitor.binaryField(fieldInfo, fieldsStream, numBytes);
|
||||
} else if (numeric != 0) {
|
||||
switch(numeric) {
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_INT:
|
||||
doStop = visitor.intField(fieldInfo, fieldsStream.readInt());
|
||||
break;
|
||||
case LOAD_AND_BREAK:
|
||||
addField(doc, fi, binary, tokenize, numeric);
|
||||
break out; //Get out of this loop
|
||||
case LAZY_LOAD:
|
||||
addFieldLazy(doc, fi, binary, tokenize, true, numeric);
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
|
||||
doStop = visitor.longField(fieldInfo, fieldsStream.readLong());
|
||||
break;
|
||||
case LATENT:
|
||||
addFieldLazy(doc, fi, binary, tokenize, false, numeric);
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
||||
doStop = visitor.floatField(fieldInfo, Float.intBitsToFloat(fieldsStream.readInt()));
|
||||
break;
|
||||
case SIZE:
|
||||
skipFieldBytes(addFieldSize(doc, fi, binary, numeric));
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
||||
doStop = visitor.doubleField(fieldInfo, Double.longBitsToDouble(fieldsStream.readLong()));
|
||||
break;
|
||||
case SIZE_AND_BREAK:
|
||||
addFieldSize(doc, fi, binary, numeric);
|
||||
break out; //Get out of this loop
|
||||
default:
|
||||
skipField(numeric);
|
||||
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||
}
|
||||
} else {
|
||||
// Text:
|
||||
final int numUTF8Bytes = fieldsStream.readVInt();
|
||||
doStop = visitor.stringField(fieldInfo, fieldsStream, numUTF8Bytes);
|
||||
}
|
||||
|
||||
return doc;
|
||||
if (doStop) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the length in bytes of each raw document in a
|
||||
|
@ -300,225 +292,4 @@ public final class FieldsReader implements Cloneable, Closeable {
|
|||
private void skipFieldBytes(int toRead) throws IOException {
|
||||
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
|
||||
}
|
||||
|
||||
private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
|
||||
assert numeric != 0;
|
||||
switch(numeric) {
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_INT:
|
||||
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
|
||||
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
||||
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
||||
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
|
||||
default:
|
||||
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||
}
|
||||
}
|
||||
|
||||
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
|
||||
final AbstractField f;
|
||||
if (binary) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult);
|
||||
//Need to move the pointer ahead by toRead positions
|
||||
fieldsStream.seek(pointer + toRead);
|
||||
} else if (numeric != 0) {
|
||||
f = loadNumericField(fi, numeric);
|
||||
} else {
|
||||
Field.Store store = Field.Store.YES;
|
||||
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
|
||||
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
|
||||
|
||||
int length = fieldsStream.readVInt();
|
||||
long pointer = fieldsStream.getFilePointer();
|
||||
//Skip ahead of where we are by the length of what is stored
|
||||
fieldsStream.seek(pointer+length);
|
||||
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
|
||||
}
|
||||
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
f.setIndexOptions(fi.indexOptions);
|
||||
doc.add(f);
|
||||
}
|
||||
|
||||
private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
|
||||
final AbstractField f;
|
||||
|
||||
if (binary) {
|
||||
int toRead = fieldsStream.readVInt();
|
||||
final byte[] b = new byte[toRead];
|
||||
fieldsStream.readBytes(b, 0, b.length);
|
||||
f = new Field(fi.name, b);
|
||||
} else if (numeric != 0) {
|
||||
f = loadNumericField(fi, numeric);
|
||||
} else {
|
||||
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
|
||||
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
|
||||
f = new Field(fi.name, // name
|
||||
fieldsStream.readString(), // read value
|
||||
Field.Store.YES,
|
||||
index,
|
||||
termVector);
|
||||
}
|
||||
|
||||
f.setIndexOptions(fi.indexOptions);
|
||||
f.setOmitNorms(fi.omitNorms);
|
||||
doc.add(f);
|
||||
}
|
||||
|
||||
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
|
||||
// Read just the size -- caller must skip the field content to continue reading fields
|
||||
// Return the size in bytes or chars, depending on field type
|
||||
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, int numeric) throws IOException {
|
||||
final int bytesize, size;
|
||||
switch(numeric) {
|
||||
case 0:
|
||||
size = fieldsStream.readVInt();
|
||||
bytesize = binary ? size : 2*size;
|
||||
break;
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_INT:
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
|
||||
size = bytesize = 4;
|
||||
break;
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
|
||||
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
|
||||
size = bytesize = 8;
|
||||
break;
|
||||
default:
|
||||
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
|
||||
}
|
||||
byte[] sizebytes = new byte[4];
|
||||
sizebytes[0] = (byte) (bytesize>>>24);
|
||||
sizebytes[1] = (byte) (bytesize>>>16);
|
||||
sizebytes[2] = (byte) (bytesize>>> 8);
|
||||
sizebytes[3] = (byte) bytesize ;
|
||||
doc.add(new Field(fi.name, sizebytes));
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
|
||||
* loaded.
|
||||
*/
|
||||
private class LazyField extends AbstractField implements Fieldable {
|
||||
private int toRead;
|
||||
private long pointer;
|
||||
private final boolean cacheResult;
|
||||
|
||||
public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
|
||||
super(name, store, Field.Index.NO, Field.TermVector.NO);
|
||||
this.toRead = toRead;
|
||||
this.pointer = pointer;
|
||||
this.isBinary = isBinary;
|
||||
this.cacheResult = cacheResult;
|
||||
if (isBinary)
|
||||
binaryLength = toRead;
|
||||
lazy = true;
|
||||
}
|
||||
|
||||
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
|
||||
super(name, store, index, termVector);
|
||||
this.toRead = toRead;
|
||||
this.pointer = pointer;
|
||||
this.isBinary = isBinary;
|
||||
this.cacheResult = cacheResult;
|
||||
if (isBinary)
|
||||
binaryLength = toRead;
|
||||
lazy = true;
|
||||
}
|
||||
|
||||
private IndexInput getFieldStream() {
|
||||
IndexInput localFieldsStream = fieldsStreamTL.get();
|
||||
if (localFieldsStream == null) {
|
||||
localFieldsStream = (IndexInput) cloneableFieldsStream.clone();
|
||||
fieldsStreamTL.set(localFieldsStream);
|
||||
}
|
||||
return localFieldsStream;
|
||||
}
|
||||
|
||||
/** The value of the field as a Reader, or null. If null, the String value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
public Reader readerValue() {
|
||||
ensureOpen();
|
||||
return null;
|
||||
}
|
||||
|
||||
/** The value of the field as a TokenStream, or null. If null, the Reader value,
|
||||
* String value, or binary value is used. Exactly one of stringValue(),
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
public TokenStream tokenStreamValue() {
|
||||
ensureOpen();
|
||||
return null;
|
||||
}
|
||||
|
||||
/** The value of the field as a String, or null. If null, the Reader value,
|
||||
* binary value, or TokenStream value is used. Exactly one of stringValue(),
|
||||
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
|
||||
public String stringValue() {
|
||||
ensureOpen();
|
||||
if (isBinary)
|
||||
return null;
|
||||
else {
|
||||
if (fieldsData == null) {
|
||||
String result = null;
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
byte[] bytes = new byte[toRead];
|
||||
localFieldsStream.readBytes(bytes, 0, toRead);
|
||||
result = new String(bytes, "UTF-8");
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
if (cacheResult == true){
|
||||
fieldsData = result;
|
||||
}
|
||||
return result;
|
||||
} else {
|
||||
return (String) fieldsData;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getBinaryValue(byte[] result) {
|
||||
ensureOpen();
|
||||
|
||||
if (isBinary) {
|
||||
if (fieldsData == null) {
|
||||
// Allocate new buffer if result is null or too small
|
||||
final byte[] b;
|
||||
if (result == null || result.length < toRead)
|
||||
b = new byte[toRead];
|
||||
else
|
||||
b = result;
|
||||
|
||||
IndexInput localFieldsStream = getFieldStream();
|
||||
|
||||
// Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
|
||||
// since they are already handling this exception when getting the document
|
||||
try {
|
||||
localFieldsStream.seek(pointer);
|
||||
localFieldsStream.readBytes(b, 0, toRead);
|
||||
} catch (IOException e) {
|
||||
throw new FieldReaderException(e);
|
||||
}
|
||||
|
||||
binaryOffset = 0;
|
||||
binaryLength = toRead;
|
||||
if (cacheResult == true){
|
||||
fieldsData = b;
|
||||
}
|
||||
return b;
|
||||
} else {
|
||||
return (byte[]) fieldsData;
|
||||
}
|
||||
} else
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,19 +17,16 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
final class FieldsWriter {
|
||||
static final int FIELD_IS_TOKENIZED = 1 << 0;
|
||||
// NOTE: bit 0 is free here! You can steal it!
|
||||
static final int FIELD_IS_BINARY = 1 << 1;
|
||||
|
||||
// the old bit 1 << 2 was compressed, is now left out
|
||||
|
@ -138,15 +135,17 @@ final class FieldsWriter {
|
|||
}
|
||||
}
|
||||
|
||||
final void writeField(int fieldNumber, Fieldable field) throws IOException {
|
||||
final void writeField(int fieldNumber, IndexableField field) throws IOException {
|
||||
fieldsStream.writeVInt(fieldNumber);
|
||||
int bits = 0;
|
||||
if (field.isTokenized())
|
||||
bits |= FIELD_IS_TOKENIZED;
|
||||
if (field.isBinary())
|
||||
bits |= FIELD_IS_BINARY;
|
||||
if (field instanceof NumericField) {
|
||||
switch (((NumericField) field).getDataType()) {
|
||||
final BytesRef bytes;
|
||||
final String string;
|
||||
// TODO: maybe a field should serialize itself?
|
||||
// this way we don't bake into indexer all these
|
||||
// specific encodings for different fields? and apps
|
||||
// can customize...
|
||||
if (field.numeric()) {
|
||||
switch (field.numericDataType()) {
|
||||
case INT:
|
||||
bits |= FIELD_IS_NUMERIC_INT; break;
|
||||
case LONG:
|
||||
|
@ -158,23 +157,31 @@ final class FieldsWriter {
|
|||
default:
|
||||
assert false : "Should never get here";
|
||||
}
|
||||
string = null;
|
||||
bytes = null;
|
||||
} else {
|
||||
bytes = field.binaryValue();
|
||||
if (bytes != null) {
|
||||
bits |= FIELD_IS_BINARY;
|
||||
string = null;
|
||||
} else {
|
||||
string = field.stringValue();
|
||||
}
|
||||
}
|
||||
|
||||
fieldsStream.writeByte((byte) bits);
|
||||
|
||||
if (field.isBinary()) {
|
||||
final byte[] data;
|
||||
final int len;
|
||||
final int offset;
|
||||
data = field.getBinaryValue();
|
||||
len = field.getBinaryLength();
|
||||
offset = field.getBinaryOffset();
|
||||
|
||||
fieldsStream.writeVInt(len);
|
||||
fieldsStream.writeBytes(data, offset, len);
|
||||
} else if (field instanceof NumericField) {
|
||||
final NumericField nf = (NumericField) field;
|
||||
final Number n = nf.getNumericValue();
|
||||
switch (nf.getDataType()) {
|
||||
if (bytes != null) {
|
||||
fieldsStream.writeVInt(bytes.length);
|
||||
fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length);
|
||||
} else if (string != null) {
|
||||
fieldsStream.writeString(field.stringValue());
|
||||
} else {
|
||||
final Number n = field.numericValue();
|
||||
if (n == null) {
|
||||
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
|
||||
}
|
||||
switch (field.numericDataType()) {
|
||||
case INT:
|
||||
fieldsStream.writeInt(n.intValue()); break;
|
||||
case LONG:
|
||||
|
@ -186,8 +193,6 @@ final class FieldsWriter {
|
|||
default:
|
||||
assert false : "Should never get here";
|
||||
}
|
||||
} else {
|
||||
fieldsStream.writeString(field.stringValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -207,21 +212,21 @@ final class FieldsWriter {
|
|||
assert fieldsStream.getFilePointer() == position;
|
||||
}
|
||||
|
||||
final void addDocument(Document doc, FieldInfos fieldInfos) throws IOException {
|
||||
final void addDocument(Iterable<? extends IndexableField> doc, FieldInfos fieldInfos) throws IOException {
|
||||
indexStream.writeLong(fieldsStream.getFilePointer());
|
||||
|
||||
int storedCount = 0;
|
||||
List<Fieldable> fields = doc.getFields();
|
||||
for (Fieldable field : fields) {
|
||||
if (field.isStored())
|
||||
for (IndexableField field : doc) {
|
||||
if (field.stored()) {
|
||||
storedCount++;
|
||||
}
|
||||
}
|
||||
fieldsStream.writeVInt(storedCount);
|
||||
|
||||
|
||||
for (Fieldable field : fields) {
|
||||
if (field.isStored())
|
||||
for (IndexableField field : doc) {
|
||||
if (field.stored()) {
|
||||
writeField(fieldInfos.fieldNumber(field.name()), field);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,9 +17,8 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -351,9 +350,9 @@ public class FilterIndexReader extends IndexReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
return in.document(n, fieldSelector);
|
||||
in.document(docID, visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Comparator;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.index.codecs.PostingsConsumer;
|
||||
|
@ -82,15 +81,17 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
|
|||
}
|
||||
|
||||
@Override
|
||||
boolean start(Fieldable[] fields, int count) {
|
||||
for(int i=0;i<count;i++)
|
||||
if (fields[i].isIndexed())
|
||||
boolean start(IndexableField[] fields, int count) {
|
||||
for(int i=0;i<count;i++) {
|
||||
if (fields[i].indexed()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
void start(Fieldable f) {
|
||||
void start(IndexableField f) {
|
||||
if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
|
||||
payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
|
||||
} else {
|
||||
|
|
|
@ -17,29 +17,28 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.index.codecs.Codec;
|
||||
import org.apache.lucene.index.codecs.CodecProvider;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.search.FieldCache; // javadocs
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ReaderUtil; // for javadocs
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.Closeable;
|
||||
import java.util.Collection;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/** IndexReader is an abstract class, providing an interface for accessing an
|
||||
index. Search of an index is done entirely through this abstract interface,
|
||||
so that any subclass which implements it is searchable.
|
||||
|
@ -859,7 +858,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
* @return array of term frequency vectors. May be null if no term vectors have been
|
||||
* stored for the specified document.
|
||||
* @throws IOException if index cannot be accessed
|
||||
* @see org.apache.lucene.document.Field.TermVector
|
||||
*/
|
||||
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
|
||||
throws IOException;
|
||||
|
@ -877,7 +875,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
* @return term frequency vector May be null if field does not exist in the specified
|
||||
* document or term vector was not stored.
|
||||
* @throws IOException if index cannot be accessed
|
||||
* @see org.apache.lucene.document.Field.TermVector
|
||||
*/
|
||||
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
|
||||
throws IOException;
|
||||
|
@ -946,34 +943,17 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
return maxDoc() - numDocs();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the stored fields of the <code>n</code><sup>th</sup>
|
||||
* <code>Document</code> in this index.
|
||||
* <p>
|
||||
* <b>NOTE:</b> for performance reasons, this method does not check if the
|
||||
* requested document is deleted, and therefore asking for a deleted document
|
||||
* may yield unspecified results. Usually this is not required, however you
|
||||
* can test if the doc is deleted by checking the {@link
|
||||
* Bits} returned from {@link MultiFields#getLiveDocs}.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public Document document(int n) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
return document(n, null);
|
||||
}
|
||||
/** Expert: visits the fields of a stored document, for
|
||||
* custom processing/loading of each field. If you
|
||||
* simply want to load all fields, use {@link
|
||||
* #document(int)}. If you want to load a subset, use
|
||||
* {@link DocumentStoredFieldVisitor}. */
|
||||
public abstract void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException;
|
||||
|
||||
/**
|
||||
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code>
|
||||
* <sup>th</sup> position. The {@link FieldSelector} may be used to determine
|
||||
* what {@link org.apache.lucene.document.Field}s to load and how they should
|
||||
* be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying
|
||||
* <code>FieldsReader</code>) is closed before the lazy
|
||||
* {@link org.apache.lucene.document.Field} is loaded an exception may be
|
||||
* thrown. If you want the value of a lazy
|
||||
* {@link org.apache.lucene.document.Field} to be available after closing you
|
||||
* must explicitly load it or fetch the Document again with a new loader.
|
||||
* Returns the stored fields of the <code>n</code><sup>th</sup>
|
||||
* <code>Document</code> in this index. This is just
|
||||
* sugar for using {@link DocumentStoredFieldVisitor}.
|
||||
* <p>
|
||||
* <b>NOTE:</b> for performance reasons, this method does not check if the
|
||||
* requested document is deleted, and therefore asking for a deleted document
|
||||
|
@ -981,21 +961,23 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
* can test if the doc is deleted by checking the {@link
|
||||
* Bits} returned from {@link MultiFields#getLiveDocs}.
|
||||
*
|
||||
* @param n Get the document at the <code>n</code><sup>th</sup> position
|
||||
* @param fieldSelector The {@link FieldSelector} to use to determine what
|
||||
* Fields should be loaded on the Document. May be null, in which case
|
||||
* all Fields will be loaded.
|
||||
* @return The stored fields of the
|
||||
* {@link org.apache.lucene.document.Document} at the nth position
|
||||
* <b>NOTE:</b> only the content of a field is returned,
|
||||
* if that field was stored during indexing. Metadata
|
||||
* like boost, omitNorm, IndexOptions, tokenized, etc.,
|
||||
* are not preserved.
|
||||
*
|
||||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
* @see org.apache.lucene.document.Fieldable
|
||||
* @see org.apache.lucene.document.FieldSelector
|
||||
* @see org.apache.lucene.document.SetBasedFieldSelector
|
||||
* @see org.apache.lucene.document.LoadFirstFieldSelector
|
||||
*/
|
||||
// TODO (1.5): When we convert to JDK 1.5 make this Set<String>
|
||||
public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
|
||||
// TODO: we need a separate StoredField, so that the
|
||||
// Document returned here contains that class not
|
||||
// IndexableField
|
||||
public Document document(int docID) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
|
||||
document(docID, visitor);
|
||||
return visitor.getDocument();
|
||||
}
|
||||
|
||||
/** Returns true if any documents have been deleted */
|
||||
public abstract boolean hasDeletions();
|
||||
|
@ -1017,8 +999,8 @@ public abstract class IndexReader implements Cloneable,Closeable {
|
|||
public abstract byte[] norms(String field) throws IOException;
|
||||
|
||||
/** Expert: Resets the normalization factor for the named field of the named
|
||||
* document. By default, The norm represents the product of the field's {@link
|
||||
* org.apache.lucene.document.Fieldable#setBoost(float) boost} and its
|
||||
* document. By default, the norm represents the product of the field's {@link
|
||||
* org.apache.lucene.document.Field#setBoost(float) boost} and its
|
||||
* length normalization}. Thus, to preserve the length normalization
|
||||
* values when resetting this, one should base the new value upon the old.
|
||||
*
|
||||
|
|
|
@ -36,7 +36,6 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
|
||||
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -74,10 +73,10 @@ import org.apache.lucene.util.TwoPhaseCommit;
|
|||
new index if there is not already an index at the provided path
|
||||
and otherwise open the existing index.</p>
|
||||
|
||||
<p>In either case, documents are added with {@link #addDocument(Document)
|
||||
<p>In either case, documents are added with {@link #addDocument(Iterable)
|
||||
addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
|
||||
#deleteDocuments(Query)}. A document can be updated with {@link
|
||||
#updateDocument(Term, Document) updateDocument} (which just deletes
|
||||
#updateDocument(Term, Iterable) updateDocument} (which just deletes
|
||||
and then adds the entire document). When finished adding, deleting
|
||||
and updating documents, {@link #close() close} should be called.</p>
|
||||
|
||||
|
@ -1281,7 +1280,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void addDocument(Document doc) throws CorruptIndexException, IOException {
|
||||
public void addDocument(Iterable<? extends IndexableField> doc) throws CorruptIndexException, IOException {
|
||||
addDocument(doc, analyzer);
|
||||
}
|
||||
|
||||
|
@ -1289,7 +1288,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
* Adds a document to this index, using the provided analyzer instead of the
|
||||
* value of {@link #getAnalyzer()}.
|
||||
*
|
||||
* <p>See {@link #addDocument(Document)} for details on
|
||||
* <p>See {@link #addDocument(Iterable)} for details on
|
||||
* index and IndexWriter state after an Exception, and
|
||||
* flushing/merging temporary free space requirements.</p>
|
||||
*
|
||||
|
@ -1300,7 +1299,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
public void addDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
updateDocument(null, doc, analyzer);
|
||||
}
|
||||
|
||||
|
@ -1318,7 +1317,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
* compression), in which case you may need to fully
|
||||
* re-index your documents at that time.
|
||||
*
|
||||
* <p>See {@link #addDocument(Document)} for details on
|
||||
* <p>See {@link #addDocument(Iterable)} for details on
|
||||
* index and IndexWriter state after an Exception, and
|
||||
* flushing/merging temporary free space requirements.</p>
|
||||
*
|
||||
|
@ -1338,7 +1337,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws CorruptIndexException, IOException {
|
||||
addDocuments(docs, analyzer);
|
||||
}
|
||||
|
||||
|
@ -1353,7 +1352,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void addDocuments(Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
updateDocuments(null, docs, analyzer);
|
||||
}
|
||||
|
||||
|
@ -1370,7 +1369,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws CorruptIndexException, IOException {
|
||||
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) throws CorruptIndexException, IOException {
|
||||
updateDocuments(delTerm, docs, analyzer);
|
||||
}
|
||||
|
||||
|
@ -1388,7 +1387,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public void updateDocuments(Term delTerm, Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
try {
|
||||
boolean success = false;
|
||||
|
@ -1511,7 +1510,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
|
||||
public void updateDocument(Term term, Iterable<? extends IndexableField> doc) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
updateDocument(term, doc, getAnalyzer());
|
||||
}
|
||||
|
@ -1534,7 +1533,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
* @throws CorruptIndexException if the index is corrupt
|
||||
* @throws IOException if there is a low-level IO error
|
||||
*/
|
||||
public void updateDocument(Term term, Document doc, Analyzer analyzer)
|
||||
public void updateDocument(Term term, Iterable<? extends IndexableField> doc, Analyzer analyzer)
|
||||
throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
try {
|
||||
|
@ -3034,7 +3033,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
DocumentsWriter getDocsWriter() {
|
||||
boolean test = false;
|
||||
assert test = true;
|
||||
return test?docWriter: null;
|
||||
return test ? docWriter : null;
|
||||
}
|
||||
|
||||
/** Expert: Return the number of documents currently
|
||||
|
|
|
@ -0,0 +1,104 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.NumericField;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.values.PerDocFieldValues;
|
||||
import org.apache.lucene.index.values.ValueType;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
// TODO: how to handle versioning here...?
|
||||
|
||||
// TODO: we need to break out separate StoredField...
|
||||
|
||||
/** Represents a single field for indexing. IndexWriter
|
||||
* consumes Iterable<IndexableField> as a document.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public interface IndexableField {
|
||||
|
||||
// TODO: add attrs to this API?
|
||||
|
||||
/* Field name */
|
||||
public String name();
|
||||
|
||||
// NOTE: if doc/field impl has the notion of "doc level boost"
|
||||
// it must be multiplied in w/ this field's boost
|
||||
|
||||
/** Field boost (you must pre-multiply in any doc boost). */
|
||||
public float boost();
|
||||
|
||||
/* True if the field's value should be stored */
|
||||
public boolean stored();
|
||||
|
||||
/* Non-null if this field has a binary value */
|
||||
public BytesRef binaryValue();
|
||||
|
||||
/* Non-null if this field has a string value */
|
||||
public String stringValue();
|
||||
|
||||
/* Non-null if this field has a Reader value */
|
||||
public Reader readerValue();
|
||||
|
||||
/* Non-null if this field has a pre-tokenized ({@link TokenStream}) value */
|
||||
public TokenStream tokenStreamValue();
|
||||
|
||||
// Numeric field:
|
||||
/* True if this field is numeric */
|
||||
public boolean numeric();
|
||||
|
||||
/* Numeric {@link NumericField.DataType}; only used if
|
||||
* the field is numeric */
|
||||
public NumericField.DataType numericDataType();
|
||||
|
||||
/* Numeric value; only used if the field is numeric */
|
||||
public Number numericValue();
|
||||
|
||||
/* True if this field should be indexed (inverted) */
|
||||
public boolean indexed();
|
||||
|
||||
/* True if this field's value should be analyzed */
|
||||
public boolean tokenized();
|
||||
|
||||
/* True if norms should not be indexed */
|
||||
public boolean omitNorms();
|
||||
|
||||
/* {@link IndexOptions}, describing what should be
|
||||
* recorded into the inverted index */
|
||||
public IndexOptions indexOptions();
|
||||
|
||||
/* True if term vectors should be indexed */
|
||||
public boolean storeTermVectors();
|
||||
|
||||
/* True if term vector offsets should be indexed */
|
||||
public boolean storeTermVectorOffsets();
|
||||
|
||||
/* True if term vector positions should be indexed */
|
||||
public boolean storeTermVectorPositions();
|
||||
|
||||
/* Non-null if doc values should be indexed */
|
||||
public PerDocFieldValues docValues();
|
||||
|
||||
/* DocValues type; only used if docValues is non-null */
|
||||
public ValueType docValuesType();
|
||||
}
|
|
@ -19,24 +19,22 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
||||
abstract class InvertedDocConsumerPerField {
|
||||
|
||||
// Called once per field, and is given all Fieldable
|
||||
// Called once per field, and is given all IndexableField
|
||||
// occurrences for this field in the document. Return
|
||||
// true if you wish to see inverted tokens for these
|
||||
// fields:
|
||||
abstract boolean start(Fieldable[] fields, int count) throws IOException;
|
||||
abstract boolean start(IndexableField[] fields, int count) throws IOException;
|
||||
|
||||
// Called before a field instance is being processed
|
||||
abstract void start(Fieldable field);
|
||||
abstract void start(IndexableField field);
|
||||
|
||||
// Called once per inverted token
|
||||
abstract void add() throws IOException;
|
||||
|
||||
// Called once per field per document, after all Fieldable
|
||||
// occurrences are inverted
|
||||
// Called once per field per document, after all IndexableFields
|
||||
// are inverted
|
||||
abstract void finish() throws IOException;
|
||||
|
||||
// Called on hitting an aborting exception
|
||||
|
|
|
@ -22,8 +22,6 @@ import java.util.Collection;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -258,12 +256,11 @@ public class MultiReader extends IndexReader implements Cloneable {
|
|||
return maxDoc;
|
||||
}
|
||||
|
||||
// inherit javadoc
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
int i = readerIndex(n); // find segment num
|
||||
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
|
||||
int i = readerIndex(docID); // find segment num
|
||||
subReaders[i].document(docID - starts[i], visitor); // dispatch to segment reader
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -17,10 +17,6 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.document.FieldSelectorResult;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.values.IndexDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
|
@ -351,31 +347,13 @@ public class ParallelReader extends IndexReader {
|
|||
hasDeletions = false;
|
||||
}
|
||||
|
||||
// append fields from storedFieldReaders
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
Document result = new Document();
|
||||
for (final IndexReader reader: storedFieldReaders) {
|
||||
|
||||
boolean include = (fieldSelector==null);
|
||||
if (!include) {
|
||||
Collection<String> fields = readerToFields.get(reader);
|
||||
for (final String field : fields)
|
||||
if (fieldSelector.accept(field) != FieldSelectorResult.NO_LOAD) {
|
||||
include = true;
|
||||
break;
|
||||
reader.document(docID, visitor);
|
||||
}
|
||||
}
|
||||
if (include) {
|
||||
List<Fieldable> fields = reader.document(n, fieldSelector).getFields();
|
||||
for (Fieldable field : fields) {
|
||||
result.add(field);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// get all vectors
|
||||
@Override
|
||||
|
|
|
@ -25,9 +25,7 @@ import java.util.Map.Entry;
|
|||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
|
@ -71,12 +69,11 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
|||
// index is allowed to have exactly one document or 0.
|
||||
if (numDocs == 1) {
|
||||
Document doc = r.document(r.maxDoc() - 1);
|
||||
Field sid = doc.getField(SNAPSHOTS_ID);
|
||||
if (sid == null) {
|
||||
if (doc.getField(SNAPSHOTS_ID) == null) {
|
||||
throw new IllegalStateException("directory is not a valid snapshots store!");
|
||||
}
|
||||
doc.removeField(SNAPSHOTS_ID);
|
||||
for (Fieldable f : doc.getFields()) {
|
||||
for (IndexableField f : doc) {
|
||||
snapshots.put(f.name(), f.stringValue());
|
||||
}
|
||||
} else if (numDocs != 0) {
|
||||
|
@ -189,12 +186,14 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
|
|||
private void persistSnapshotInfos(String id, String segment) throws IOException {
|
||||
writer.deleteAll();
|
||||
Document d = new Document();
|
||||
d.add(new Field(SNAPSHOTS_ID, "", Store.YES, Index.NO));
|
||||
FieldType ft = new FieldType();
|
||||
ft.setStored(true);
|
||||
d.add(new Field(SNAPSHOTS_ID, ft, ""));
|
||||
for (Entry<String, String> e : super.getSnapshots().entrySet()) {
|
||||
d.add(new Field(e.getKey(), e.getValue(), Store.YES, Index.NO));
|
||||
d.add(new Field(e.getKey(), ft, e.getValue()));
|
||||
}
|
||||
if (id != null) {
|
||||
d.add(new Field(id, segment, Store.YES, Index.NO));
|
||||
d.add(new Field(id, ft, segment));
|
||||
}
|
||||
writer.addDocument(d);
|
||||
writer.commit();
|
||||
|
|
|
@ -335,6 +335,10 @@ final class SegmentMerger {
|
|||
// skip deleted docs
|
||||
continue;
|
||||
}
|
||||
// TODO: this could be more efficient using
|
||||
// FieldVisitor instead of loading/writing entire
|
||||
// doc; ie we just have to renumber the field number
|
||||
// on the fly?
|
||||
// NOTE: it's very important to first assign to doc then pass it to
|
||||
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
|
||||
Document doc = reader.reader.document(j);
|
||||
|
|
|
@ -27,13 +27,11 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldSelector;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.index.FieldInfo.IndexOptions;
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.BitVector;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -455,10 +453,9 @@ public class SegmentReader extends IndexReader implements Cloneable {
|
|||
return core.fieldInfos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
|
||||
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
return getFieldsReader().doc(n, fieldSelector);
|
||||
getFieldsReader().visitDocument(docID, visitor);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,87 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* Expert: provides a low-level means of accessing the stored field
|
||||
* values in an index. See {@link IndexReader#document(int,
|
||||
* StoredFieldVisitor)}.
|
||||
*
|
||||
* See {@link DocumentStoredFieldVisitor}, which is a
|
||||
* <code>StoredFieldVisitor</code> that builds the
|
||||
* {@link Document} containing all stored fields. This is
|
||||
* used by {@link IndexReader#document(int)}.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
|
||||
public class StoredFieldVisitor {
|
||||
/** Process a binary field. Note that if you want to
|
||||
* skip the field you must seek the IndexInput
|
||||
* (e.g., call <code>in.seek(numUTF8Bytes + in.getFilePointer()</code>)
|
||||
*
|
||||
* <p>Return true to stop loading fields. */
|
||||
public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException {
|
||||
in.seek(in.getFilePointer() + numBytes);
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Process a string field by reading numUTF8Bytes.
|
||||
* Note that if you want to skip the field you must
|
||||
* seek the IndexInput as if you had read numBytes by
|
||||
* (e.g., call <code>in.seek(numUTF8Bytes + in.getFilePointer()</code>)
|
||||
*
|
||||
* <p>Return true to stop loading fields. */
|
||||
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
|
||||
in.seek(in.getFilePointer() + numUTF8Bytes);
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Process a int numeric field.
|
||||
*
|
||||
* <p>Return true to stop loading fields. */
|
||||
public boolean intField(FieldInfo fieldInfo, int value) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Process a long numeric field.
|
||||
*
|
||||
* <p>Return true to stop loading fields. */
|
||||
public boolean longField(FieldInfo fieldInfo, long value) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Process a float numeric field.
|
||||
*
|
||||
* <p>Return true to stop loading fields. */
|
||||
public boolean floatField(FieldInfo fieldInfo, float value) throws IOException {
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Process a double numeric field.
|
||||
*
|
||||
* <p>Return true to stop loading fields. */
|
||||
public boolean doubleField(FieldInfo fieldInfo, double value) throws IOException {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
|
@ -41,12 +40,12 @@ final class StoredFieldsWriter {
|
|||
}
|
||||
|
||||
private int numStoredFields;
|
||||
private Fieldable[] storedFields;
|
||||
private IndexableField[] storedFields;
|
||||
private int[] fieldNumbers;
|
||||
|
||||
public void reset() {
|
||||
numStoredFields = 0;
|
||||
storedFields = new Fieldable[1];
|
||||
storedFields = new IndexableField[1];
|
||||
fieldNumbers = new int[1];
|
||||
}
|
||||
|
||||
|
@ -123,10 +122,10 @@ final class StoredFieldsWriter {
|
|||
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end");
|
||||
}
|
||||
|
||||
public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException {
|
||||
public void addField(IndexableField field, FieldInfo fieldInfo) throws IOException {
|
||||
if (numStoredFields == storedFields.length) {
|
||||
int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
|
||||
Fieldable[] newArray = new Fieldable[newSize];
|
||||
IndexableField[] newArray = new IndexableField[newSize];
|
||||
System.arraycopy(storedFields, 0, newArray, 0, numStoredFields);
|
||||
storedFields = newArray;
|
||||
}
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef;
|
|||
*/
|
||||
public interface TermFreqVector {
|
||||
/**
|
||||
* The {@link org.apache.lucene.document.Fieldable} name.
|
||||
* The {@link org.apache.lucene.index.IndexableField} name.
|
||||
* @return The name of the field this vector is associated with.
|
||||
*
|
||||
*/
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.index;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -55,17 +54,17 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
|
|||
}
|
||||
|
||||
@Override
|
||||
boolean start(Fieldable[] fields, int count) {
|
||||
boolean start(IndexableField[] fields, int count) {
|
||||
doVectors = false;
|
||||
doVectorPositions = false;
|
||||
doVectorOffsets = false;
|
||||
|
||||
for(int i=0;i<count;i++) {
|
||||
Fieldable field = fields[i];
|
||||
if (field.isIndexed() && field.isTermVectorStored()) {
|
||||
IndexableField field = fields[i];
|
||||
if (field.indexed() && field.storeTermVectors()) {
|
||||
doVectors = true;
|
||||
doVectorPositions |= field.isStorePositionWithTermVector();
|
||||
doVectorOffsets |= field.isStoreOffsetWithTermVector();
|
||||
doVectorPositions |= field.storeTermVectorPositions();
|
||||
doVectorOffsets |= field.storeTermVectorOffsets();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -188,7 +187,7 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
|
|||
}
|
||||
|
||||
@Override
|
||||
void start(Fieldable f) {
|
||||
void start(IndexableField f) {
|
||||
if (doVectorOffsets) {
|
||||
offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
|
||||
} else {
|
||||
|
|
|
@ -24,13 +24,11 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
||||
abstract class TermsHashConsumerPerField {
|
||||
abstract boolean start(Fieldable[] fields, int count) throws IOException;
|
||||
abstract boolean start(IndexableField[] fields, int count) throws IOException;
|
||||
abstract void finish() throws IOException;
|
||||
abstract void skippingLongTerm() throws IOException;
|
||||
abstract void start(Fieldable field);
|
||||
abstract void start(IndexableField field);
|
||||
abstract void newTerm(int termID) throws IOException;
|
||||
abstract void addTerm(int termID) throws IOException;
|
||||
abstract int getStreamCount();
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.Comparator;
|
|||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
|
@ -116,7 +115,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
|||
private boolean doNextCall;
|
||||
|
||||
@Override
|
||||
void start(Fieldable f) {
|
||||
void start(IndexableField f) {
|
||||
termAtt = fieldState.attributeSource.getAttribute(TermToBytesRefAttribute.class);
|
||||
termBytesRef = termAtt.getBytesRef();
|
||||
consumer.start(f);
|
||||
|
@ -126,11 +125,12 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
|
|||
}
|
||||
|
||||
@Override
|
||||
boolean start(Fieldable[] fields, int count) throws IOException {
|
||||
boolean start(IndexableField[] fields, int count) throws IOException {
|
||||
doCall = consumer.start(fields, count);
|
||||
bytesHash.reinit();
|
||||
if (nextPerField != null)
|
||||
if (nextPerField != null) {
|
||||
doNextCall = nextPerField.start(fields, count);
|
||||
}
|
||||
return doCall || doNextCall;
|
||||
}
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.util.Collection;
|
|||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray;
|
||||
import org.apache.lucene.index.values.IndexDocValues.Source;
|
||||
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
|
||||
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
|
||||
|
|
|
@ -19,14 +19,12 @@ package org.apache.lucene.index.values;
|
|||
import java.util.Comparator;
|
||||
|
||||
import org.apache.lucene.document.IndexDocValuesField;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.codecs.DocValuesConsumer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Per document and field values consumed by {@link DocValuesConsumer}.
|
||||
* @see IndexDocValuesField
|
||||
* @see Fieldable#setDocValues(PerDocFieldValues)
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
@ -91,11 +89,10 @@ public interface PerDocFieldValues {
|
|||
/**
|
||||
* Sets the {@link ValueType}
|
||||
*/
|
||||
public void setType(ValueType type);
|
||||
public void setDocValuesType(ValueType type);
|
||||
|
||||
/**
|
||||
* Returns the {@link ValueType}
|
||||
*/
|
||||
public ValueType type();
|
||||
|
||||
public ValueType docValuesType();
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue