LUCENE-2308: cutover to FieldType

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1162347 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2011-08-27 13:27:01 +00:00
parent 100ce41c79
commit 4dad0ba89f
390 changed files with 6271 additions and 5183 deletions

View File

@ -255,6 +255,11 @@ Changes in Runtime Behavior
* LUCENE-3146: IndexReader.setNorm throws IllegalStateException if the field
does not store norms. (Shai Erera, Mike McCandless)
* LUCENE-3309: Stored fields no longer record whether they were
tokenized or not. In general you should not rely on stored fields
to record any "metadata" from indexing (tokenized, omitNorms,
IndexOptions, boost, etc.) (Mike McCandless)
API Changes

View File

@ -4,6 +4,14 @@ For more information on past and future Lucene versions, please see:
http://s.apache.org/luceneversions
======================= Trunk (not yet released) =======================
Changes in Runtime Behavior
* LUCENE-3309: Fast vector highlighter now inserts the
MultiValuedSeparator for NOT_ANALYZED fields (in addition to
ANALYZED fields). To ensure your offsets are correct you should
provide an analyzer that returns 1 from the offsetGap method.
(Mike McCandless)
Build

View File

@ -22,7 +22,8 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
@ -173,8 +174,7 @@ public class IndexFiles {
// field that is indexed (i.e. searchable), but don't tokenize
// the field into separate words and don't index term frequency
// or positional information:
Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
Field pathField = new Field("path", StringField.TYPE_STORED, file.getPath());
doc.add(pathField);
// Add the last modified date of the file a field named "modified".
@ -192,7 +192,7 @@ public class IndexFiles {
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in UTF-8 encoding.
// If that's not the case searching for special characters will fail.
doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can be there):

View File

@ -17,10 +17,26 @@
package org.apache.lucene.demo.xmlparser;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Enumeration;
import java.util.Properties;
import java.util.StringTokenizer;
import javax.servlet.RequestDispatcher;
import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -33,20 +49,6 @@ import org.apache.lucene.util.Version;
import org.apache.lucene.xmlparser.CorePlusExtensionsParser;
import org.apache.lucene.xmlparser.QueryTemplateManager;
import javax.servlet.RequestDispatcher;
import javax.servlet.ServletConfig;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Enumeration;
import java.util.Properties;
import java.util.StringTokenizer;
public class FormBasedXmlQueryDemo extends HttpServlet {
private QueryTemplateManager queryTemplateManager;
@ -124,20 +126,18 @@ public class FormBasedXmlQueryDemo extends HttpServlet {
InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv");
BufferedReader br = new BufferedReader(new InputStreamReader(dataIn));
String line = br.readLine();
final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED);
textNoNorms.setOmitNorms(true);
while (line != null) {
line = line.trim();
if (line.length() > 0) {
//parse row and create a document
StringTokenizer st = new StringTokenizer(line, "\t");
Document doc = new Document();
doc.add(new Field("location", st.nextToken(), Field.Store.YES,
Field.Index.ANALYZED_NO_NORMS));
doc.add(new Field("salary", st.nextToken(), Field.Store.YES,
Field.Index.ANALYZED_NO_NORMS));
doc.add(new Field("type", st.nextToken(), Field.Store.YES,
Field.Index.ANALYZED_NO_NORMS));
doc.add(new Field("description", st.nextToken(), Field.Store.YES,
Field.Index.ANALYZED));
doc.add(new Field("location", textNoNorms, st.nextToken()));
doc.add(new Field("salary", textNoNorms, st.nextToken()));
doc.add(new Field("type", textNoNorms, st.nextToken()));
doc.add(new Field("description", textNoNorms, st.nextToken()));
writer.addDocument(doc);
}
line = br.readLine();

View File

@ -60,10 +60,11 @@ public class TokenSources {
* @param analyzer The analyzer to use for creating the TokenStream if the
* vector doesn't exist
* @return The {@link org.apache.lucene.analysis.TokenStream} for the
* {@link org.apache.lucene.document.Fieldable} on the
* {@link org.apache.lucene.index.IndexableField} on the
* {@link org.apache.lucene.document.Document}
* @throws IOException if there was an error loading
*/
public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
String field, Document doc, Analyzer analyzer) throws IOException {
TokenStream ts = null;

View File

@ -21,15 +21,18 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo;
import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo;
import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs;
import org.apache.lucene.store.IndexInput;
public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
@ -107,10 +110,27 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
return fragments.toArray( new String[fragments.size()] );
}
protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException {
protected Field[] getFields( IndexReader reader, int docId, final String fieldName) throws IOException {
// according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field???
Document doc = reader.document( docId, new MapFieldSelector(fieldName) );
return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null
final List<Field> fields = new ArrayList<Field>();
reader.document(docId, new StoredFieldVisitor() {
@Override
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
if (fieldInfo.name.equals(fieldName)) {
final byte[] b = new byte[numUTF8Bytes];
in.readBytes(b, 0, b.length);
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.storeTermVector);
ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector);
ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector);
fields.add(new Field(fieldInfo.name, ft, new String(b, "UTF-8")));
} else {
in.seek(in.getFilePointer() + numUTF8Bytes);
}
return false;
}
});
return fields.toArray(new Field[fields.size()]);
}
protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo,
@ -142,8 +162,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder {
int startOffset, int endOffset ){
while( buffer.length() < endOffset && index[0] < values.length ){
buffer.append( values[index[0]].stringValue() );
if( values[index[0]].isTokenized() )
buffer.append( multiValuedSeparator );
buffer.append( multiValuedSeparator );
index[0]++;
}
int eo = buffer.length() < endOffset ? buffer.length() : endOffset;

View File

@ -46,8 +46,12 @@ public class FieldTermStack {
// Directory dir = new RAMDirectory();
// IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
// Document doc = new Document();
// doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
// doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
// FieldType ft = new FieldType(TextField.TYPE_STORED);
// ft.setStoreTermVectors(true);
// ft.setStoreTermVectorOffsets(true);
// ft.setStoreTermVectorPositions(true);
// doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) );
// doc.add( new Field( "f", ft, "b a b a f" ) );
// writer.addDocument( doc );
// writer.close();
@ -67,7 +71,7 @@ public class FieldTermStack {
*/
public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException {
this.fieldName = fieldName;
TermFreqVector tfv = reader.getTermFreqVector( docId, fieldName );
if( tfv == null ) return; // just return to make null snippets
TermPositionVector tpv = null;

View File

@ -25,7 +25,7 @@ This is an another highlighter implementation.
<li>support N-gram fields</li>
<li>support phrase-unit highlighting with slops</li>
<li>need Java 1.5</li>
<li>highlight fields need to be TermVector.WITH_POSITIONS_OFFSETS</li>
<li>highlight fields need to be stored with Positions and Offsets</li>
<li>take into account query boost to score fragments</li>
<li>support colored highlight tags</li>
<li>pluggable FragListBuilder</li>
@ -95,7 +95,7 @@ will be generated:</p>
<h3>Step 2.</h3>
<p>In Step 2, Fast Vector Highlighter generates {@link org.apache.lucene.search.vectorhighlight.FieldTermStack}. Fast Vector Highlighter uses {@link org.apache.lucene.index.TermFreqVector} data
(must be stored {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS_OFFSETS})
(must be stored {@link org.apache.lucene.document.FieldType#setStoreTermVectorOffsets(boolean)} and {@link org.apache.lucene.document.FieldType#setStoreTermVectorPositions(boolean)})
to generate it. <code>FieldTermStack</code> keeps the terms in the user query.
Therefore, in this sample case, Fast Vector Highlighter generates the following <code>FieldTermStack</code>:</p>
<pre>

View File

@ -28,9 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@ -61,8 +60,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamConcurrent(),
TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, customType, new TokenStreamConcurrent()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
@ -105,8 +107,12 @@ public class HighlighterPhraseTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamConcurrent(),
TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, customType, new TokenStreamConcurrent()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
@ -176,8 +182,12 @@ public class HighlighterPhraseTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamSparse(),
TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, customType, new TokenStreamSparse()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
@ -219,8 +229,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
document.add(new Field(FIELD, TEXT, Store.YES, Index.ANALYZED,
TermVector.WITH_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, customType, TEXT));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
@ -260,8 +273,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamSparse(),
TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectors(true);
document.add(new Field(FIELD, customType, new TokenStreamSparse()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();

View File

@ -38,8 +38,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -1629,7 +1628,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
private Document doc( String f, String v ){
Document doc = new Document();
doc.add( new Field( f, v, Store.YES, Index.ANALYZED ) );
doc.add( new Field( f, TextField.TYPE_STORED, v));
return doc;
}
@ -1690,7 +1689,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
* QueryFragmentScorer(query));
*
* for (int i = 0; i < hits.totalHits; i++) { String text =
* searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
* searcher.doc2(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
* tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
* String highlightedText = highlighter.getBestFragment(tokenStream,text);
* System.out.println(highlightedText); } }
@ -1744,21 +1743,21 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
addDoc(writer, text);
}
Document doc = new Document();
NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
nfield.setIntValue(1);
doc.add(nfield);
writer.addDocument(doc, analyzer);
nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
nfield.setIntValue(3);
doc = new Document();
doc.add(nfield);
writer.addDocument(doc, analyzer);
nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
nfield.setIntValue(5);
doc = new Document();
doc.add(nfield);
writer.addDocument(doc, analyzer);
nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
nfield.setIntValue(7);
doc = new Document();
doc.add(nfield);
@ -1779,7 +1778,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
private void addDoc(IndexWriter writer, String text) throws IOException {
Document d = new Document();
Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.ANALYZED);
Field f = new Field(FIELD_NAME, TextField.TYPE_STORED, text);
d.add(f);
writer.addDocument(d);

View File

@ -28,7 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
@ -107,8 +108,10 @@ public class TokenSourcesTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamOverlap(),
TermVector.WITH_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
@ -153,8 +156,11 @@ public class TokenSourcesTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamOverlap(),
TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
@ -199,8 +205,10 @@ public class TokenSourcesTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamOverlap(),
TermVector.WITH_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();
@ -246,8 +254,10 @@ public class TokenSourcesTest extends LuceneTestCase {
newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
try {
final Document document = new Document();
document.add(new Field(FIELD, new TokenStreamOverlap(),
TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
indexWriter.addDocument(document);
} finally {
indexWriter.close();

View File

@ -34,14 +34,13 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
@ -359,8 +358,13 @@ public abstract class AbstractTestCase extends LuceneTestCase {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
Document doc = new Document();
for( String value: values )
doc.add( new Field( F, value, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
for( String value: values ) {
doc.add( new Field( F, customType, value ) );
}
writer.addDocument( doc );
writer.close();
if (reader != null) reader.close();
@ -372,8 +376,14 @@ public abstract class AbstractTestCase extends LuceneTestCase {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzerK).setOpenMode(OpenMode.CREATE));
Document doc = new Document();
for( String value: values )
doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
for( String value: values ) {
doc.add( new Field( F, customType, value ));
//doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
}
writer.addDocument( doc );
writer.close();
if (reader != null) reader.close();

View File

@ -19,9 +19,8 @@ package org.apache.lucene.search.vectorhighlight;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -139,7 +138,12 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, analyzerW).setOpenMode(OpenMode.CREATE));
Document doc = new Document();
doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
doc.add( new Field( F, customType, "aaa" ) );
//doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
writer.addDocument( doc );
writer.close();
if (reader != null) reader.close();
@ -155,9 +159,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
SimpleFragListBuilder sflb = new SimpleFragListBuilder();
FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
// '/' separator doesn't effect the snippet because of NOT_ANALYZED field
sfb.setMultiValuedSeparator( '/' );
assertEquals( "abc<b>defg</b>hijkl", sfb.createFragment( reader, 0, F, ffl ) );
assertEquals( "abc/<b>defg</b>/hijkl/", sfb.createFragment( reader, 0, F, ffl ) );
}
public void testMVSeparator() throws Exception {

View File

@ -68,7 +68,6 @@ public class InstantiatedDocument {
return document;
}
@Override
public String toString() {
return document.toString();

View File

@ -27,8 +27,8 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiNorms;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermsEnum;
@ -190,16 +190,16 @@ public class InstantiatedIndex
InstantiatedDocument document = new InstantiatedDocument();
// copy stored fields from source reader
Document sourceDocument = sourceIndexReader.document(i);
for (Fieldable field : sourceDocument.getFields()) {
for (IndexableField field : sourceDocument) {
if (fields == null || fields.contains(field.name())) {
document.getDocument().add(field);
}
}
document.setDocumentNumber(i);
documentsByNumber[i] = document;
for (Fieldable field : document.getDocument().getFields()) {
for (IndexableField field : document.getDocument()) {
if (fields == null || fields.contains(field.name())) {
if (field.isTermVectorStored()) {
if (field.storeTermVectors()) {
if (document.getVectorSpace() == null) {
document.setVectorSpace(new HashMap<String, List<InstantiatedTermDocumentInformation>>());
}
@ -290,8 +290,8 @@ public class InstantiatedIndex
if (document == null) {
continue; // deleted
}
for (Fieldable field : document.getDocument().getFields()) {
if (field.isTermVectorStored() && field.isStoreOffsetWithTermVector()) {
for (IndexableField field : document.getDocument()) {
if (field.storeTermVectors() && field.storeTermVectorOffsets()) {
TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
if (termPositionVector != null) {
for (int i = 0; i < termPositionVector.getTerms().length; i++) {

View File

@ -30,7 +30,6 @@ import java.util.Set;
import java.util.Comparator;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.*;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.Directory;
@ -251,42 +250,6 @@ public class InstantiatedIndexReader extends IndexReader {
return fieldSet;
}
/**
* Return the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup>
* position.
<p>
* <b>Warning!</b>
* The resulting document is the actual stored document instance
* and not a deserialized clone as retuned by an IndexReader
* over a {@link org.apache.lucene.store.Directory}.
* I.e., if you need to touch the document, clone it first!
* <p>
* This can also be seen as a feature for live changes of stored values,
* but be careful! Adding a field with an name unknown to the index
* or to a field with previously no stored values will make
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
* out of sync, causing problems for instance when merging the
* instantiated index to another index.
<p>
* This implementation ignores the field selector! All stored fields are always returned!
* <p>
*
* @param n document number
* @param fieldSelector ignored
* @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*
* @see org.apache.lucene.document.Fieldable
* @see org.apache.lucene.document.FieldSelector
* @see org.apache.lucene.document.SetBasedFieldSelector
* @see org.apache.lucene.document.LoadFirstFieldSelector
*/
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
return document(n);
}
/**
* Returns the stored fields of the <code>n</code><sup>th</sup>
* <code>Document</code> in this index.
@ -313,6 +276,11 @@ public class InstantiatedIndexReader extends IndexReader {
return getIndex().getDocumentsByNumber()[n].getDocument();
}
@Override
public void document(int docID, StoredFieldVisitor visitor) throws IOException {
throw new UnsupportedOperationException();
}
/**
* never ever touch these values. it is the true values, unless norms have
* been touched.

View File

@ -37,9 +37,9 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.search.IndexSearcher;
@ -238,7 +238,7 @@ public class InstantiatedIndexWriter implements Closeable {
if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) {
final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName;
final FieldInvertState invertState = new FieldInvertState();
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost());
invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost);
invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength);
normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).computeNorm(invertState);
} else {
@ -469,7 +469,7 @@ public class InstantiatedIndexWriter implements Closeable {
// normalize settings per field name in document
Map<String /* field name */, FieldSetting> fieldSettingsByFieldName = new HashMap<String, FieldSetting>();
for (Fieldable field : document.getDocument().getFields()) {
for (IndexableField field : document.getDocument()) {
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
if (fieldSetting == null) {
fieldSetting = new FieldSetting();
@ -479,52 +479,52 @@ public class InstantiatedIndexWriter implements Closeable {
}
// todo: fixme: multiple fields with the same name does not mean field boost += more boost.
fieldSetting.boost *= field.getBoost();
fieldSetting.boost *= field.boost();
//fieldSettings.dimensions++;
// once fieldSettings, always fieldSettings.
if (field.getOmitNorms()) {
if (field.omitNorms()) {
fieldSetting.omitNorms = true;
}
if (field.isIndexed() ) {
if (field.indexed() ) {
fieldSetting.indexed = true;
}
if (field.isTokenized()) {
if (field.tokenized()) {
fieldSetting.tokenized = true;
}
if (field.isStored()) {
if (field.stored()) {
fieldSetting.stored = true;
}
if (field.isBinary()) {
if (field.binaryValue() != null) {
fieldSetting.isBinary = true;
}
if (field.isTermVectorStored()) {
if (field.storeTermVectors()) {
fieldSetting.storeTermVector = true;
}
if (field.isStorePositionWithTermVector()) {
if (field.storeTermVectorPositions()) {
fieldSetting.storePositionWithTermVector = true;
}
if (field.isStoreOffsetWithTermVector()) {
if (field.storeTermVectorOffsets()) {
fieldSetting.storeOffsetWithTermVector = true;
}
}
Map<Fieldable, LinkedList<Token>> tokensByField = new LinkedHashMap<Fieldable, LinkedList<Token>>(20);
Map<IndexableField, LinkedList<Token>> tokensByField = new LinkedHashMap<IndexableField, LinkedList<Token>>(20);
// tokenize indexed fields.
for (Iterator<Fieldable> it = document.getDocument().getFields().iterator(); it.hasNext();) {
for (Iterator<IndexableField> it = document.getDocument().iterator(); it.hasNext();) {
Fieldable field = it.next();
IndexableField field = it.next();
FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name());
if (field.isIndexed()) {
if (field.indexed()) {
LinkedList<Token> tokens = new LinkedList<Token>();
tokensByField.put(field, tokens);
if (field.isTokenized()) {
if (field.tokenized()) {
final TokenStream tokenStream;
// todo readerValue(), binaryValue()
if (field.tokenStreamValue() != null) {
@ -564,8 +564,8 @@ public class InstantiatedIndexWriter implements Closeable {
}
}
if (!field.isStored()) {
it.remove();
if (!field.stored()) {
//it.remove();
}
}
@ -574,7 +574,7 @@ public class InstantiatedIndexWriter implements Closeable {
termDocumentInformationFactoryByDocument.put(document, termDocumentInformationFactoryByTermTextAndFieldSetting);
// build term vector, term positions and term offsets
for (Map.Entry<Fieldable, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
for (Map.Entry<IndexableField, LinkedList<Token>> eField_Tokens : tokensByField.entrySet()) {
FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name());
Map<String, TermDocumentInformationFactory> termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name()));
@ -610,7 +610,7 @@ public class InstantiatedIndexWriter implements Closeable {
termDocumentInformationFactory.payloads.add(null);
}
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
if (eField_Tokens.getKey().storeTermVectorOffsets()) {
termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset()));
lastOffset = fieldSetting.offset + token.endOffset();
@ -619,7 +619,7 @@ public class InstantiatedIndexWriter implements Closeable {
}
if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) {
if (eField_Tokens.getKey().storeTermVectorOffsets()) {
fieldSetting.offset = lastOffset + 1;
}

View File

@ -29,6 +29,8 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiNorms;
@ -204,19 +206,44 @@ public class TestIndicesEquals extends LuceneTestCase {
private void assembleDocument(Document document, int i) {
document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_STORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorOffsets(true);
customType.setStoreTermVectorPositions(true);
//document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
document.add(new Field("a", customType, i + " Do you really want to go and live in that house all winter?"));
if (i > 0) {
document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
//document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
document.add(new Field("b0", customType, i + " All work and no play makes Jack a dull boy"));
//document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO));
FieldType customType2 = new FieldType(TextField.TYPE_STORED);
customType2.setTokenized(false);
customType2.setOmitNorms(true);
document.add(new Field("b1", customType2, i + " All work and no play makes Jack a dull boy"));
//document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO));
FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED);
customType3.setTokenized(false);
document.add(new Field("b1", customType3, i + " All work and no play makes Jack a dull boy"));
//document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO));
FieldType customType4 = new FieldType(TextField.TYPE_STORED);
customType4.setIndexed(false);
customType4.setTokenized(false);
document.add(new Field("b1", customType4, i + " All work and no play makes Jack a dull boy"));
if (i > 1) {
document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
//document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
document.add(new Field("c", customType, i + " Redrum redrum"));
if (i > 2) {
document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
//document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
document.add(new Field("d", customType, i + " Hello Danny, come and play with us... forever and ever. and ever."));
if (i > 3) {
Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
f.setOmitNorms(true);
//Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
//f.setOmitNorms(true);
FieldType customType5 = new FieldType(TextField.TYPE_UNSTORED);
customType5.setOmitNorms(true);
Field f = new Field("e", customType5, i + " Heres Johnny!");
document.add(f);
if (i > 4) {
final List<Token> tokens = new ArrayList<Token>(2);
@ -247,7 +274,8 @@ public class TestIndicesEquals extends LuceneTestCase {
}
};
document.add(new Field("f", ts));
//document.add(new Field("f", ts));
document.add(new TextField("f", ts));
}
}
}

View File

@ -19,7 +19,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.LuceneTestCase;
@ -43,7 +43,7 @@ public class TestRealTime extends LuceneTestCase {
Collector collector;
doc = new Document();
doc.add(new Field("f", "a", Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new StringField("f", "a"));
writer.addDocument(doc);
writer.commit();
@ -52,7 +52,7 @@ public class TestRealTime extends LuceneTestCase {
assertEquals(1, collector.hits);
doc = new Document();
doc.add(new Field("f", "a", Field.Store.NO, Field.Index.NOT_ANALYZED));
doc.add(new StringField("f", "a"));
writer.addDocument(doc);
writer.commit();

View File

@ -25,7 +25,7 @@ import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
/**
* @since 2009-mar-30 13:15:49
@ -66,7 +66,7 @@ public class TestUnoptimizedReaderOnConstructor extends LuceneTestCase {
private void addDocument(IndexWriter iw, String text) throws IOException {
Document doc = new Document();
doc.add(new Field("field", text, Field.Store.NO, Field.Index.ANALYZED));
doc.add(new TextField("field", text));
iw.addDocument(doc);
}
}

View File

@ -35,23 +35,24 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FieldsEnum;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.OrdTermState;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermState;
import org.apache.lucene.index.TermVectorMapper;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -60,8 +61,8 @@ import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.RAMDirectory; // for javadocs
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants; // for javadocs
/**
@ -240,11 +241,8 @@ public class MemoryIndex {
/**
* Convenience method; Tokenizes the given field text and adds the resulting
* terms to the index; Equivalent to adding an indexed non-keyword Lucene
* {@link org.apache.lucene.document.Field} that is
* {@link org.apache.lucene.document.Field.Index#ANALYZED tokenized},
* {@link org.apache.lucene.document.Field.Store#NO not stored},
* {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions} (or
* {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions and offsets}),
* {@link org.apache.lucene.document.Field} that is tokenized, not stored,
* termVectorStored with positions (or termVectorStored with positions and offsets),
*
* @param fieldName
* a name to be associated with the text
@ -1237,18 +1235,11 @@ public class MemoryIndex {
}
@Override
public Document document(int n) {
public void document(int docID, StoredFieldVisitor visitor) {
if (DEBUG) System.err.println("MemoryIndexReader.document");
return new Document(); // there are no stored fields
// no-op: there are no stored fields
}
//When we convert to JDK 1.5 make this Set<String>
@Override
public Document document(int n, FieldSelector fieldSelector) throws IOException {
if (DEBUG) System.err.println("MemoryIndexReader.document");
return new Document(); // there are no stored fields
}
@Override
public boolean hasDeletions() {
if (DEBUG) System.err.println("MemoryIndexReader.hasDeletions");

View File

@ -31,6 +31,7 @@ import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
@ -108,8 +109,8 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
IndexWriter writer = new IndexWriter(ramdir,
new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodecProvider(_TestUtil.alwaysCodec("Standard")));
Document doc = new Document();
Field field1 = newField("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED);
Field field2 = newField("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED);
Field field1 = newField("foo", fooField.toString(), TextField.TYPE_UNSTORED);
Field field2 = newField("term", termField.toString(), TextField.TYPE_UNSTORED);
doc.add(field1);
doc.add(field2);
writer.addDocument(doc);

View File

@ -1,5 +1,6 @@
package org.apache.lucene.document;
/**
* Copyright 2004 The Apache Software Foundation
*
@ -18,7 +19,7 @@ package org.apache.lucene.document;
/**
* Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about
* what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)}
* what Fields get loaded on a {@link Document} by {@link FieldSelectorVisitor}
*
**/
public interface FieldSelector {

View File

@ -20,39 +20,41 @@ package org.apache.lucene.document;
* Provides information about what should be done with this Field
*
**/
import org.apache.lucene.index.IndexableField; // for javadocs
public enum FieldSelectorResult {
/**
* Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encountered.
* {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null.
* {@link Document#getField(String)} should not return null.
*<p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
* {@link Document#add(IndexableField)} should be called by the Reader.
*/
LOAD,
/**
* Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
* return a valid instance of a {@link Fieldable}.
* invoked. {@link Document#getField(String)} is safe to use and should
* return a valid instance of a {@link IndexableField}.
*<p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
* {@link Document#add(IndexableField)} should be called by the Reader.
*/
LAZY_LOAD,
/**
* Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null.
* {@link Document#add(Fieldable)} is not called.
* Do not load the {@link Field}. {@link Document#getField(String)} should return null.
* {@link Document#add(IndexableField)} is not called.
* <p/>
* {@link Document#add(Fieldable)} should not be called by the Reader.
* {@link Document#add(IndexableField)} should not be called by the Reader.
*/
NO_LOAD,
/**
* Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the
* Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should
* Document may not have its complete set of Fields. {@link Document#getField(String)} should
* both be valid for this {@link Field}
* <p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
* {@link Document#add(IndexableField)} should be called by the Reader.
*/
LOAD_AND_BREAK,
@ -67,10 +69,10 @@ public enum FieldSelectorResult {
/**
* Lazily load this {@link Field}, but do not cache the result. This means the {@link Field} is valid, but it may not actually contain its data until
* invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should
* return a valid instance of a {@link Fieldable}.
* invoked. {@link Document#getField(String)} is safe to use and should
* return a valid instance of a {@link IndexableField}.
*<p/>
* {@link Document#add(Fieldable)} should be called by the Reader.
* {@link Document#add(IndexableField)} should be called by the Reader.
*/
LATENT
}

View File

@ -0,0 +1,319 @@
package org.apache.lucene.document;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.NumericField.DataType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldReaderException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BytesRef;
/** Create this, passing a legacy {@link FieldSelector} to it, then
* pass this class to {@link IndexReader#document(int,
* StoredFieldVisitor)}, then call {@link #getDocument} to
* retrieve the loaded document.
* <p><b>NOTE</b>: If you use Lazy fields, you should not
* access the returned document after the reader has been
* closed!
*/
public class FieldSelectorVisitor extends StoredFieldVisitor {
private final FieldSelector selector;
private final Document doc;
public FieldSelectorVisitor(FieldSelector selector) {
this.selector = selector;
doc = new Document();
}
public Document getDocument() {
return doc;
}
@Override
public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException {
final FieldSelectorResult accept = selector.accept(fieldInfo.name);
switch (accept) {
case LOAD:
case LOAD_AND_BREAK:
final byte[] b = new byte[numBytes];
in.readBytes(b, 0, b.length);
doc.add(new BinaryField(fieldInfo.name, b));
return accept != FieldSelectorResult.LOAD;
case LAZY_LOAD:
case LATENT:
addFieldLazy(in, fieldInfo, true, accept == FieldSelectorResult.LAZY_LOAD, numBytes);
return false;
case SIZE:
case SIZE_AND_BREAK:
in.seek(in.getFilePointer() + numBytes);
addFieldSize(fieldInfo, numBytes);
return accept != FieldSelectorResult.SIZE;
default:
// skip
in.seek(in.getFilePointer() + numBytes);
return false;
}
}
@Override
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
final FieldSelectorResult accept = selector.accept(fieldInfo.name);
switch (accept) {
case LOAD:
case LOAD_AND_BREAK:
final byte[] b = new byte[numUTF8Bytes];
in.readBytes(b, 0, b.length);
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.storeTermVector);
ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector);
ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector);
doc.add(new Field(fieldInfo.name, ft, new String(b, "UTF-8")));
return accept != FieldSelectorResult.LOAD;
case LAZY_LOAD:
case LATENT:
addFieldLazy(in, fieldInfo, false, accept == FieldSelectorResult.LAZY_LOAD, numUTF8Bytes);
return false;
case SIZE:
case SIZE_AND_BREAK:
in.seek(in.getFilePointer() + numUTF8Bytes);
addFieldSize(fieldInfo, 2*numUTF8Bytes);
return accept != FieldSelectorResult.SIZE;
default:
// skip
in.seek(in.getFilePointer() + numUTF8Bytes);
return false;
}
}
@Override
public boolean intField(FieldInfo fieldInfo, int value) throws IOException {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
ft.setOmitNorms(fieldInfo.omitNorms);
ft.setIndexOptions(fieldInfo.indexOptions);
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setIntValue(value));
}
@Override
public boolean longField(FieldInfo fieldInfo, long value) throws IOException {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
ft.setOmitNorms(fieldInfo.omitNorms);
ft.setIndexOptions(fieldInfo.indexOptions);
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setLongValue(value));
}
@Override
public boolean floatField(FieldInfo fieldInfo, float value) throws IOException {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
ft.setOmitNorms(fieldInfo.omitNorms);
ft.setIndexOptions(fieldInfo.indexOptions);
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setFloatValue(value));
}
@Override
public boolean doubleField(FieldInfo fieldInfo, double value) throws IOException {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
ft.setOmitNorms(fieldInfo.omitNorms);
ft.setIndexOptions(fieldInfo.indexOptions);
return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setDoubleValue(value));
}
private boolean addNumericField(FieldInfo fieldInfo, NumericField f) {
doc.add(f);
final FieldSelectorResult accept = selector.accept(fieldInfo.name);
switch (accept) {
case LOAD:
return false;
case LOAD_AND_BREAK:
return true;
case LAZY_LOAD:
case LATENT:
return false;
case SIZE:
return false;
case SIZE_AND_BREAK:
return true;
default:
return false;
}
}
private void addFieldLazy(IndexInput in, FieldInfo fi, boolean binary, boolean cacheResult, int numBytes) throws IOException {
final IndexableField f;
final long pointer = in.getFilePointer();
// Need to move the pointer ahead by toRead positions
in.seek(pointer+numBytes);
FieldType ft = new FieldType();
ft.setStored(true);
ft.setOmitNorms(fi.omitNorms);
ft.setIndexOptions(fi.indexOptions);
ft.setLazy(true);
if (binary) {
f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult);
} else {
ft.setStoreTermVectors(fi.storeTermVector);
ft.setStoreTermVectorOffsets(fi.storeOffsetWithTermVector);
ft.setStoreTermVectorPositions(fi.storePositionWithTermVector);
f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult);
}
doc.add(f);
}
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
private void addFieldSize(FieldInfo fi, int numBytes) throws IOException {
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (numBytes>>>24);
sizebytes[1] = (byte) (numBytes>>>16);
sizebytes[2] = (byte) (numBytes>>> 8);
sizebytes[3] = (byte) numBytes ;
doc.add(new BinaryField(fi.name, sizebytes));
}
/**
* A Lazy field implementation that defers loading of fields until asked for, instead of when the Document is
* loaded.
*/
private static class LazyField extends Field {
private int toRead;
private long pointer;
private final boolean cacheResult;
private final IndexInput in;
private boolean isBinary;
public LazyField(IndexInput in, String name, FieldType ft, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
super(name, ft);
this.in = in;
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
this.cacheResult = cacheResult;
}
@Override
public Number numericValue() {
return null;
}
@Override
public DataType numericDataType() {
return null;
}
private IndexInput localFieldsStream;
private IndexInput getFieldStream() {
if (localFieldsStream == null) {
localFieldsStream = (IndexInput) in.clone();
}
return localFieldsStream;
}
/** The value of the field as a Reader, or null. If null, the String value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
@Override
public Reader readerValue() {
return null;
}
/** The value of the field as a TokenStream, or null. If null, the Reader value,
* String value, or binary value is used. Exactly one of stringValue(),
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
@Override
public TokenStream tokenStreamValue() {
return null;
}
/** The value of the field as a String, or null. If null, the Reader value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
@Override
synchronized public String stringValue() {
if (isBinary) {
return null;
} else {
if (fieldsData == null) {
String result = null;
IndexInput localFieldsStream = getFieldStream();
try {
localFieldsStream.seek(pointer);
byte[] bytes = new byte[toRead];
localFieldsStream.readBytes(bytes, 0, toRead);
result = new String(bytes, "UTF-8");
} catch (IOException e) {
throw new FieldReaderException(e);
}
if (cacheResult == true){
fieldsData = result;
}
return result;
} else {
return (String) fieldsData;
}
}
}
@Override
synchronized public BytesRef binaryValue() {
if (isBinary) {
if (fieldsData == null) {
// Allocate new buffer if result is null or too small
final byte[] b = new byte[toRead];
IndexInput localFieldsStream = getFieldStream();
// Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
// since they are already handling this exception when getting the document
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, toRead);
} catch (IOException e) {
throw new FieldReaderException(e);
}
final BytesRef result = new BytesRef(b);
result.length = toRead;
if (cacheResult == true){
fieldsData = result;
}
return result;
} else {
return (BytesRef) fieldsData;
}
} else {
return null;
}
}
}
}

View File

@ -26,4 +26,4 @@ public class LoadFirstFieldSelector implements FieldSelector {
public FieldSelectorResult accept(String fieldName) {
return FieldSelectorResult.LOAD_AND_BREAK;
}
}
}

View File

@ -1,6 +1,5 @@
package org.apache.lucene.document;
import java.util.Set;
/**
* Copyright 2004 The Apache Software Foundation
*
@ -17,10 +16,14 @@ import java.util.Set;
* limitations under the License.
*/
import java.util.Set;
/**
* Declare what fields to load normally and what fields to load lazily
*
**/
public class SetBasedFieldSelector implements FieldSelector {
private Set<String> fieldsToLoad;
@ -55,4 +58,4 @@ public class SetBasedFieldSelector implements FieldSelector {
}
return result;
}
}
}

View File

@ -35,7 +35,7 @@ import org.apache.lucene.util.ReaderUtil;
*
* If Similarity class is specified, uses its computeNorm method to set norms.
* If -n command line argument is used, removed field norms, as if
* {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used.
* {@link org.apache.lucene.document.FieldType#setOmitNorms(boolean)} was used.
*
* <p>
* NOTE: This will overwrite any length normalization or field/document boosts.

View File

@ -0,0 +1,318 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.File;
import java.io.IOException;
import java.util.*;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.FieldSelectorVisitor;
import org.apache.lucene.document.LoadFirstFieldSelector;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.junit.AfterClass;
import org.junit.BeforeClass;
public class TestContribFieldsReader extends LuceneTestCase {
private static Directory dir;
private static org.apache.lucene.document.Document testDoc = new org.apache.lucene.document.Document();
private static FieldInfos fieldInfos = null;
@BeforeClass
public static void beforeClass() throws Exception {
fieldInfos = new FieldInfos();
DocHelper.setupDoc(testDoc);
_TestUtil.add(testDoc, fieldInfos);
dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy());
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
IndexWriter writer = new IndexWriter(dir, conf);
writer.addDocument(testDoc);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
dir.close();
dir = null;
fieldInfos = null;
testDoc = null;
}
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
ir.document(docID, visitor);
return visitor.getDocument();
}
public void testLazyFields() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
IndexReader reader = IndexReader.open(dir);
Set<String> loadFieldNames = new HashSet<String>();
loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
Set<String> lazyFieldNames = new HashSet<String>();
//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
Document doc = getDocument(reader, 0, fieldSelector);
assertTrue("doc is null and it shouldn't be", doc != null);
IndexableField field = doc.getField(DocHelper.LAZY_FIELD_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("field is not lazy and it should be", field.getClass().getSimpleName().equals("LazyField"));
String value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
field = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
byte [] bytes = field.binaryValue().bytes;
assertTrue("bytes is null and it shouldn't be", bytes != null);
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
assertTrue("calling binaryValue() twice should give same reference", field.binaryValue().bytes == field.binaryValue().bytes);
for (int i = 0; i < bytes.length; i++) {
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
}
reader.close();
}
public void testLatentFields() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
IndexReader reader = IndexReader.open(dir);
Set<String> loadFieldNames = new HashSet<String>();
loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
Set<String> lazyFieldNames = new HashSet<String>();
//new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
// Use LATENT instead of LAZY
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) {
@Override
public FieldSelectorResult accept(String fieldName) {
final FieldSelectorResult result = super.accept(fieldName);
if (result == FieldSelectorResult.LAZY_LOAD) {
return FieldSelectorResult.LATENT;
} else {
return result;
}
}
};
Document doc = getDocument(reader, 0, fieldSelector);
assertTrue("doc is null and it shouldn't be", doc != null);
IndexableField field = doc.getField(DocHelper.LAZY_FIELD_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("field is not lazy and it should be", field.getClass().getSimpleName().equals("LazyField"));
String value = field.stringValue();
assertTrue("value is null and it shouldn't be", value != null);
assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
field = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue());
field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField"));
assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue());
field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null);
assertTrue("calling binaryValue() twice should give different references", field.binaryValue().bytes != field.binaryValue().bytes);
byte [] bytes = field.binaryValue().bytes;
assertTrue("bytes is null and it shouldn't be", bytes != null);
assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
for (int i = 0; i < bytes.length; i++) {
assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
}
reader.close();
}
public void testLoadFirst() throws Exception {
assertTrue(dir != null);
assertTrue(fieldInfos != null);
IndexReader reader = IndexReader.open(dir);
LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
Document doc = getDocument(reader, 0, fieldSelector);
assertTrue("doc is null and it shouldn't be", doc != null);
int count = 0;
List<IndexableField> l = doc.getFields();
for (final IndexableField IndexableField : l ) {
Field field = (Field) IndexableField;
assertTrue("field is null and it shouldn't be", field != null);
String sv = field.stringValue();
assertTrue("sv is null and it shouldn't be", sv != null);
count++;
}
assertTrue(count + " does not equal: " + 1, count == 1);
reader.close();
}
/**
* Not really a test per se, but we should have some way of assessing whether this is worthwhile.
* <p/>
* Must test using a File based directory
*
* @throws Exception
*/
public void testLazyPerformance() throws Exception {
String userName = System.getProperty("user.name");
File file = _TestUtil.getTempDir("lazyDir" + userName);
Directory tmpDir = newFSDirectory(file);
assertTrue(tmpDir != null);
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy());
((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false);
IndexWriter writer = new IndexWriter(tmpDir, conf);
writer.addDocument(testDoc);
writer.close();
assertTrue(fieldInfos != null);
long lazyTime = 0;
long regularTime = 0;
int length = 10;
Set<String> lazyFieldNames = new HashSet<String>();
lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections. <String> emptySet(), lazyFieldNames);
for (int i = 0; i < length; i++) {
IndexReader reader = IndexReader.open(tmpDir);
Document doc;
doc = reader.document(0);//Load all of them
assertTrue("doc is null and it shouldn't be", doc != null);
IndexableField field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
assertTrue("field is null and it shouldn't be", field != null);
assertFalse("field is lazy", field.getClass().getSimpleName().equals("LazyField"));
String value;
long start;
long finish;
start = System.currentTimeMillis();
//On my machine this was always 0ms.
value = field.stringValue();
finish = System.currentTimeMillis();
assertTrue("value is null and it shouldn't be", value != null);
regularTime += (finish - start);
reader.close();
reader = null;
doc = null;
//Hmmm, are we still in cache???
System.gc();
reader = IndexReader.open(tmpDir);
doc = getDocument(reader, 0, fieldSelector);
field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
assertTrue("field is not lazy", field.getClass().getSimpleName().equals("LazyField"));
start = System.currentTimeMillis();
//On my machine this took around 50 - 70ms
value = field.stringValue();
finish = System.currentTimeMillis();
assertTrue("value is null and it shouldn't be", value != null);
lazyTime += (finish - start);
reader.close();
}
tmpDir.close();
if (VERBOSE) {
System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
}
}
public void testLoadSize() throws IOException {
IndexReader reader = IndexReader.open(dir);
Document doc;
doc = getDocument(reader, 0, new FieldSelector(){
public FieldSelectorResult accept(String fieldName) {
if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) ||
fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY))
return FieldSelectorResult.SIZE;
else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY))
return FieldSelectorResult.LOAD;
else
return FieldSelectorResult.NO_LOAD;
}
});
IndexableField f1 = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
IndexableField f3 = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
IndexableField fb = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
assertTrue(f1.binaryValue()!=null);
assertTrue(f3.binaryValue()==null);
assertTrue(fb.binaryValue()!=null);
assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue().bytes);
assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue());
assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue().bytes);
reader.close();
}
private void assertSizeEquals(int size, byte[] sizebytes) {
assertEquals((byte) (size>>>24), sizebytes[0]);
assertEquals((byte) (size>>>16), sizebytes[1]);
assertEquals((byte) (size>>> 8), sizebytes[2]);
assertEquals((byte) size , sizebytes[3]);
}
}

View File

@ -0,0 +1,169 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.BinaryField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorVisitor;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SetBasedFieldSelector;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
public class TestContribIndexReader extends LuceneTestCase {
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
ir.document(docID, visitor);
return visitor.getDocument();
}
static void addDoc(IndexWriter writer, String value) throws IOException {
Document doc = new Document();
doc.add(newField("content", value, TextField.TYPE_UNSTORED));
writer.addDocument(doc);
}
static void addDocumentWithFields(IndexWriter writer) throws IOException {
Document doc = new Document();
FieldType customType3 = new FieldType();
customType3.setStored(true);
doc.add(newField("keyword", "test1", StringField.TYPE_STORED));
doc.add(newField("text", "test1", TextField.TYPE_STORED));
doc.add(newField("unindexed", "test1", customType3));
doc.add(new TextField("unstored","test1"));
writer.addDocument(doc);
}
static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException {
Document doc = new Document();
FieldType customType3 = new FieldType();
customType3.setStored(true);
doc.add(newField("keyword2", "test1", StringField.TYPE_STORED));
doc.add(newField("text2", "test1", TextField.TYPE_STORED));
doc.add(newField("unindexed2", "test1", customType3));
doc.add(new TextField("unstored2","test1"));
writer.addDocument(doc);
}
static void addDocumentWithTermVectorFields(IndexWriter writer) throws IOException {
Document doc = new Document();
FieldType customType5 = new FieldType(TextField.TYPE_STORED);
customType5.setStoreTermVectors(true);
FieldType customType6 = new FieldType(TextField.TYPE_STORED);
customType6.setStoreTermVectors(true);
customType6.setStoreTermVectorOffsets(true);
FieldType customType7 = new FieldType(TextField.TYPE_STORED);
customType7.setStoreTermVectors(true);
customType7.setStoreTermVectorPositions(true);
FieldType customType8 = new FieldType(TextField.TYPE_STORED);
customType8.setStoreTermVectors(true);
customType8.setStoreTermVectorOffsets(true);
customType8.setStoreTermVectorPositions(true);
doc.add(newField("tvnot","tvnot",TextField.TYPE_STORED));
doc.add(newField("termvector","termvector",customType5));
doc.add(newField("tvoffset","tvoffset", customType6));
doc.add(newField("tvposition","tvposition", customType7));
doc.add(newField("tvpositionoffset","tvpositionoffset", customType8));
writer.addDocument(doc);
}
public void testBinaryFields() throws IOException {
Directory dir = newDirectory();
byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()));
for (int i = 0; i < 10; i++) {
addDoc(writer, "document number " + (i + 1));
addDocumentWithFields(writer);
addDocumentWithDifferentFields(writer);
addDocumentWithTermVectorFields(writer);
}
writer.close();
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
Document doc = new Document();
doc.add(new BinaryField("bin1", bin));
doc.add(new TextField("junk", "junk text"));
writer.addDocument(doc);
writer.close();
IndexReader reader = IndexReader.open(dir, false);
Document doc2 = reader.document(reader.maxDoc() - 1);
IndexableField[] fields = doc2.getFields("bin1");
assertNotNull(fields);
assertEquals(1, fields.length);
Field b1 = (Field) fields[0];
assertTrue(b1.isBinary());
BytesRef bytesRef = b1.binaryValue();
assertEquals(bin.length, bytesRef.length);
for (int i = 0; i < bin.length; i++) {
assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
}
Set<String> lazyFields = new HashSet<String>();
lazyFields.add("bin1");
FieldSelector sel = new SetBasedFieldSelector(new HashSet<String>(), lazyFields);
doc2 = getDocument(reader, reader.maxDoc() - 1, sel);
fields = doc2.getFields("bin1");
assertNotNull(fields);
assertEquals(1, fields.length);
IndexableField fb1 = fields[0];
assertTrue(fb1.binaryValue()!=null);
bytesRef = fb1.binaryValue();
assertEquals(bin.length, bytesRef.bytes.length);
assertEquals(bin.length, bytesRef.length);
for (int i = 0; i < bin.length; i++) {
assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
}
reader.close();
// force optimize
writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy()));
writer.optimize();
writer.close();
reader = IndexReader.open(dir, false);
doc2 = reader.document(reader.maxDoc() - 1);
fields = doc2.getFields("bin1");
assertNotNull(fields);
assertEquals(1, fields.length);
b1 = (Field) fields[0];
assertTrue(b1.isBinary());
bytesRef = b1.binaryValue();
assertEquals(bin.length, bytesRef.length);
for (int i = 0; i < bin.length; i++) {
assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]);
}
reader.close();
dir.close();
}
}

View File

@ -0,0 +1,149 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorVisitor;
import org.apache.lucene.document.MapFieldSelector;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class TestContribParallelReader extends LuceneTestCase {
private IndexSearcher parallel;
private IndexSearcher single;
private Directory dir, dir1, dir2;
@Override
public void setUp() throws Exception {
super.setUp();
single = single(random);
parallel = parallel(random);
}
@Override
public void tearDown() throws Exception {
single.getIndexReader().close();
single.close();
parallel.getIndexReader().close();
parallel.close();
dir.close();
dir1.close();
dir2.close();
super.tearDown();
}
// Fields 1-4 indexed together:
private IndexSearcher single(Random random) throws IOException {
dir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
Document d1 = new Document();
d1.add(newField("f1", "v1", TextField.TYPE_STORED));
d1.add(newField("f2", "v1", TextField.TYPE_STORED));
d1.add(newField("f3", "v1", TextField.TYPE_STORED));
d1.add(newField("f4", "v1", TextField.TYPE_STORED));
w.addDocument(d1);
Document d2 = new Document();
d2.add(newField("f1", "v2", TextField.TYPE_STORED));
d2.add(newField("f2", "v2", TextField.TYPE_STORED));
d2.add(newField("f3", "v2", TextField.TYPE_STORED));
d2.add(newField("f4", "v2", TextField.TYPE_STORED));
w.addDocument(d2);
w.close();
return new IndexSearcher(dir, false);
}
// Fields 1 & 2 in one index, 3 & 4 in other, with ParallelReader:
private IndexSearcher parallel(Random random) throws IOException {
dir1 = getDir1(random);
dir2 = getDir2(random);
ParallelReader pr = new ParallelReader();
pr.add(IndexReader.open(dir1, false));
pr.add(IndexReader.open(dir2, false));
return newSearcher(pr);
}
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
ir.document(docID, visitor);
return visitor.getDocument();
}
public void testDocument() throws IOException {
Directory dir1 = getDir1(random);
Directory dir2 = getDir2(random);
ParallelReader pr = new ParallelReader();
pr.add(IndexReader.open(dir1, false));
pr.add(IndexReader.open(dir2, false));
Document doc11 = getDocument(pr, 0, new MapFieldSelector("f1"));
Document doc24 = getDocument(pr, 1, new MapFieldSelector(Arrays.asList("f4")));
Document doc223 = getDocument(pr, 1, new MapFieldSelector("f2", "f3"));
assertEquals(1, doc11.getFields().size());
assertEquals(1, doc24.getFields().size());
assertEquals(2, doc223.getFields().size());
assertEquals("v1", doc11.get("f1"));
assertEquals("v2", doc24.get("f4"));
assertEquals("v2", doc223.get("f2"));
assertEquals("v2", doc223.get("f3"));
pr.close();
dir1.close();
dir2.close();
}
private Directory getDir1(Random random) throws IOException {
Directory dir1 = newDirectory();
IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
Document d1 = new Document();
d1.add(newField("f1", "v1", TextField.TYPE_STORED));
d1.add(newField("f2", "v1", TextField.TYPE_STORED));
w1.addDocument(d1);
Document d2 = new Document();
d2.add(newField("f1", "v2", TextField.TYPE_STORED));
d2.add(newField("f2", "v2", TextField.TYPE_STORED));
w1.addDocument(d2);
w1.close();
return dir1;
}
private Directory getDir2(Random random) throws IOException {
Directory dir2 = newDirectory();
IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)));
Document d3 = new Document();
d3.add(newField("f3", "v1", TextField.TYPE_STORED));
d3.add(newField("f4", "v1", TextField.TYPE_STORED));
w2.addDocument(d3);
Document d4 = new Document();
d4.add(newField("f3", "v2", TextField.TYPE_STORED));
d4.add(newField("f4", "v2", TextField.TYPE_STORED));
w2.addDocument(d4);
w2.close();
return dir2;
}
}

View File

@ -22,7 +22,8 @@ import java.util.Arrays;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DefaultSimilarity;
@ -65,13 +66,15 @@ public class TestFieldNormModifier extends LuceneTestCase {
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
d.add(newField("field", "word", Field.Store.YES, Field.Index.ANALYZED));
d.add(newField("nonorm", "word", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
d.add(newField("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.ANALYZED));
d.add(newField("field", "word", TextField.TYPE_STORED));
d.add(newField("nonorm", "word", StringField.TYPE_STORED));
d.add(newField("untokfield", "20061212 20071212", TextField.TYPE_STORED));
for (int j = 1; j <= i; j++) {
d.add(newField("field", "crap", Field.Store.YES, Field.Index.ANALYZED));
d.add(newField("nonorm", "more words", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
d.add(newField("field", "crap", TextField.TYPE_STORED));
d.add(newField("nonorm", "more words", StringField.TYPE_STORED));
}
writer.addDocument(d);
}

View File

@ -21,6 +21,7 @@ import java.io.File;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -112,10 +113,10 @@ public class TestIndexSplitter extends LuceneTestCase {
Directory fsDir = newFSDirectory(indexPath);
IndexWriter indexWriter = new IndexWriter(fsDir, iwConfig);
Document doc = new Document();
doc.add(new Field("content", "doc 1", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
doc.add(new Field("content", StringField.TYPE_STORED, "doc 1"));
indexWriter.addDocument(doc);
doc = new Document();
doc.add(new Field("content", "doc 2", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS));
doc.add(new Field("content", StringField.TYPE_STORED, "doc 2"));
indexWriter.addDocument(doc);
indexWriter.close();
fsDir.close();

View File

@ -22,7 +22,11 @@ import java.util.List;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.FieldSelectorVisitor;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
@ -87,8 +91,7 @@ public class TestLazyBug extends LuceneTestCase {
doc.add(newField("f"+f,
data[f % data.length]
+ '#' + data[random.nextInt(data.length)],
Field.Store.NO,
Field.Index.ANALYZED));
TextField.TYPE_UNSTORED));
}
writer.addDocument(doc);
}
@ -102,12 +105,14 @@ public class TestLazyBug extends LuceneTestCase {
public void doTest(int[] docs) throws Exception {
IndexReader reader = IndexReader.open(directory, true);
for (int i = 0; i < docs.length; i++) {
Document d = reader.document(docs[i], SELECTOR);
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(SELECTOR);
reader.document(docs[i], visitor);
Document d = visitor.getDocument();
d.get(MAGIC_FIELD);
List<Fieldable> fields = d.getFields();
for (Iterator<Fieldable> fi = fields.iterator(); fi.hasNext(); ) {
Fieldable f=null;
List<IndexableField> fields = d.getFields();
for (Iterator<IndexableField> fi = fields.iterator(); fi.hasNext(); ) {
IndexableField f=null;
try {
f = fi.next();
String fname = f.name();
@ -136,5 +141,4 @@ public class TestLazyBug extends LuceneTestCase {
public void testLazyBroken() throws Exception {
doTest(new int[] { NUM_DOCS/2, NUM_DOCS-1 });
}
}

View File

@ -18,10 +18,11 @@ package org.apache.lucene.index;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
public class TestMultiPassIndexSplitter extends LuceneTestCase {
IndexReader input;
@ -36,8 +37,8 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase {
Document doc;
for (int i = 0; i < NUM_DOCS; i++) {
doc = new Document();
doc.add(newField("id", i + "", Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(newField("f", i + " " + i, Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("id", i + "", StringField.TYPE_STORED));
doc.add(newField("f", i + " " + i, TextField.TYPE_STORED));
w.addDocument(doc);
}
w.close();

View File

@ -33,7 +33,8 @@ import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.PhraseQuery;
@ -77,17 +78,12 @@ public class TestNRTManager extends LuceneTestCase {
// TODO: is there a pre-existing way to do this!!!
private Document cloneDoc(Document doc1) {
final Document doc2 = new Document();
for(Fieldable f : doc1.getFields()) {
for(IndexableField f : doc1) {
Field field1 = (Field) f;
Field field2 = new Field(field1.name(),
field1.stringValue(),
field1.isStored() ? Field.Store.YES : Field.Store.NO,
field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO);
if (field1.getOmitNorms()) {
field2.setOmitNorms(true);
}
field2.setIndexOptions(field1.getIndexOptions());
((Field) f).getFieldType(),
field1.stringValue());
doc2.add(field2);
}
@ -240,7 +236,7 @@ public class TestNRTManager extends LuceneTestCase {
final String addedField;
if (random.nextBoolean()) {
addedField = "extra" + random.nextInt(10);
doc.add(new Field(addedField, "a random field", Field.Store.NO, Field.Index.ANALYZED));
doc.add(new TextField(addedField, "a random field"));
} else {
addedField = null;
}
@ -262,7 +258,7 @@ public class TestNRTManager extends LuceneTestCase {
packID = packCount.getAndIncrement() + "";
}
final Field packIDField = newField("packID", packID, Field.Store.YES, Field.Index.NOT_ANALYZED);
final Field packIDField = newField("packID", packID, StringField.TYPE_STORED);
final List<String> docIDs = new ArrayList<String>();
final SubDocs subDocs = new SubDocs(packID, docIDs);
final List<Document> docsList = new ArrayList<Document>();

View File

@ -23,8 +23,8 @@ import java.text.NumberFormat;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
@ -102,15 +102,15 @@ public class TestPKIndexSplitter extends LuceneTestCase {
StringBuilder sb = new StringBuilder();
Document doc = new Document();
String id = format.format(n);
doc.add(newField("id", id, Store.YES, Index.NOT_ANALYZED));
doc.add(newField("indexname", indexName, Store.YES, Index.NOT_ANALYZED));
doc.add(newField("id", id, StringField.TYPE_STORED));
doc.add(newField("indexname", indexName, StringField.TYPE_STORED));
sb.append("a");
sb.append(n);
doc.add(newField("field1", sb.toString(), Store.YES, Index.ANALYZED));
doc.add(newField("field1", sb.toString(), TextField.TYPE_STORED));
sb.append(" b");
sb.append(n);
for (int i = 1; i < numFields; i++) {
doc.add(newField("field" + (i + 1), sb.toString(), Store.YES, Index.ANALYZED));
doc.add(newField("field" + (i + 1), sb.toString(), TextField.TYPE_STORED));
}
return doc;
}

View File

@ -2,7 +2,8 @@ package org.apache.lucene.index;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -30,33 +31,42 @@ public class TestTermVectorAccessor extends LuceneTestCase {
Document doc;
doc = new Document();
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setStoreTermVectors(true);
customType.setStoreTermVectorPositions(true);
customType.setStoreTermVectorOffsets(true);
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType));
doc.add(newField("b", "a b c b d b e b f b g b h b", customType));
doc.add(newField("c", "a c b c d c e c f c g c h c", customType));
iw.addDocument(doc);
doc = new Document();
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
FieldType customType2 = new FieldType(TextField.TYPE_UNSTORED);
customType2.setStoreTermVectors(true);
customType2.setStoreTermVectorPositions(true);
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType2));
doc.add(newField("b", "a b c b d b e b f b g b h b", customType2));
doc.add(newField("c", "a c b c d c e c f c g c h c", customType2));
iw.addDocument(doc);
doc = new Document();
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED);
customType3.setStoreTermVectors(true);
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType3));
doc.add(newField("b", "a b c b d b e b f b g b h b", customType3));
doc.add(newField("c", "a c b c d c e c f c g c h c", customType3));
iw.addDocument(doc);
doc = new Document();
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField("a", "a b a c a d a e a f a g a h a", TextField.TYPE_UNSTORED));
doc.add(newField("b", "a b c b d b e b f b g b h b", TextField.TYPE_UNSTORED));
doc.add(newField("c", "a c b c d c e c f c g c h c", TextField.TYPE_UNSTORED));
iw.addDocument(doc);
doc = new Document();
doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES));
doc.add(newField("a", "a b a c a d a e a f a g a h a", customType));
doc.add(newField("b", "a b c b d b e b f b g b h b", TextField.TYPE_UNSTORED));
doc.add(newField("c", "a c b c d c e c f c g c h c", customType3));
iw.addDocument(doc);
iw.close();

View File

@ -22,9 +22,8 @@ import java.util.Random;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
@ -141,7 +140,11 @@ public class TestAppendingCodec extends LuceneTestCase {
((TieredMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false);
IndexWriter writer = new IndexWriter(dir, cfg);
Document doc = new Document();
doc.add(newField("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
FieldType storedTextType = new FieldType(TextField.TYPE_STORED);
storedTextType.setStoreTermVectors(true);
storedTextType.setStoreTermVectorPositions(true);
storedTextType.setStoreTermVectorOffsets(true);
doc.add(newField("f", text, storedTextType));
writer.addDocument(doc);
writer.commit();
writer.addDocument(doc);
@ -149,8 +152,8 @@ public class TestAppendingCodec extends LuceneTestCase {
writer.close();
IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider());
assertEquals(2, reader.numDocs());
doc = reader.document(0);
assertEquals(text, doc.get("f"));
Document doc2 = reader.document(0);
assertEquals(text, doc2.get("f"));
Fields fields = MultiFields.getFields(reader);
Terms terms = fields.terms("f");
assertNotNull(terms);

View File

@ -20,7 +20,7 @@ package org.apache.lucene.misc;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
@ -203,9 +203,9 @@ public class TestHighFreqTerms extends LuceneTestCase {
Document doc = new Document();
String content = getContent(i);
doc.add(newField(random, "FIELD_1", content, Field.Store.YES,Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField(random, "FIELD_1", content, TextField.TYPE_STORED));
//add a different field
doc.add(newField(random, "different_field", "diff", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField(random, "different_field", "diff", TextField.TYPE_STORED));
writer.addDocument(doc);
}
@ -213,7 +213,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
//highest freq terms for a specific field.
for (int i = 1; i <= 10; i++) {
Document doc = new Document();
doc.add(newField(random, "different_field", "diff", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField(random, "different_field", "diff", TextField.TYPE_STORED));
writer.addDocument(doc);
}
// add some docs where tf < df so we can see if sorting works
@ -224,7 +224,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
for (int i = 0; i < highTF; i++) {
content += "highTF ";
}
doc.add(newField(random, "FIELD_1", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField(random, "FIELD_1", content, TextField.TYPE_STORED));
writer.addDocument(doc);
// highTF medium df =5
int medium_df = 5;
@ -235,7 +235,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
for (int j = 0; j < tf; j++) {
newcontent += "highTFmedDF ";
}
newdoc.add(newField(random, "FIELD_1", newcontent, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
newdoc.add(newField(random, "FIELD_1", newcontent, TextField.TYPE_STORED));
writer.addDocument(newdoc);
}
// add a doc with high tf in field different_field
@ -245,7 +245,7 @@ public class TestHighFreqTerms extends LuceneTestCase {
for (int i = 0; i < targetTF; i++) {
content += "TF150 ";
}
doc.add(newField(random, "different_field", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
doc.add(newField(random, "different_field", content, TextField.TYPE_STORED));
writer.addDocument(doc);
writer.close();

View File

@ -21,11 +21,12 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.FieldNormModifier;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MultiNorms;
import org.apache.lucene.index.Term;
@ -70,16 +71,12 @@ public class TestLengthNormModifier extends LuceneTestCase {
for (int i = 0; i < NUM_DOCS; i++) {
Document d = new Document();
d.add(newField("field", "word",
Field.Store.YES, Field.Index.ANALYZED));
d.add(newField("nonorm", "word",
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
d.add(newField("field", "word", TextField.TYPE_STORED));
d.add(newField("nonorm", "word", StringField.TYPE_STORED));
for (int j = 1; j <= i; j++) {
d.add(newField("field", "crap",
Field.Store.YES, Field.Index.ANALYZED));
d.add(newField("nonorm", "more words",
Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
d.add(newField("field", "crap", TextField.TYPE_STORED));
d.add(newField("nonorm", "more words", StringField.TYPE_STORED));
}
writer.addDocument(d);
}

View File

@ -22,11 +22,11 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.*;
import java.util.Random;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.io.IOException;
@ -48,16 +48,16 @@ public class TestThreadSafe extends LuceneTestCase {
}
@Override
public void run() {
public void run() {
try {
for (int i=0; i<iter; i++) {
/*** future
// pick a random index reader... a shared one, or create your own
IndexReader ir;
***/
***/
switch(rand.nextInt(1)) {
case 0: loadDoc(ir1); break;
case 0: loadDoc(ir1); break;
}
}
@ -68,23 +68,28 @@ public class TestThreadSafe extends LuceneTestCase {
}
private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException {
final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector);
ir.document(docID, visitor);
return visitor.getDocument();
}
void loadDoc(IndexReader ir) throws IOException {
// beware of deleted docs in the future
Document doc = ir.document(rand.nextInt(ir.maxDoc()),
new FieldSelector() {
public FieldSelectorResult accept(String fieldName) {
switch(rand.nextInt(2)) {
case 0: return FieldSelectorResult.LAZY_LOAD;
case 1: return FieldSelectorResult.LOAD;
// TODO: add other options
default: return FieldSelectorResult.LOAD;
}
}
}
);
Document doc = getDocument(ir, rand.nextInt(ir.maxDoc()),
new FieldSelector() {
public FieldSelectorResult accept(String fieldName) {
switch(rand.nextInt(2)) {
case 0: return FieldSelectorResult.LAZY_LOAD;
case 1: return FieldSelectorResult.LOAD;
// TODO: add other options
default: return FieldSelectorResult.LOAD;
}
}
}
);
List<Fieldable> fields = doc.getFields();
for (final Fieldable f : fields ) {
for (final IndexableField f : doc ) {
validateField(f);
}
@ -93,7 +98,7 @@ public class TestThreadSafe extends LuceneTestCase {
}
void validateField(Fieldable f) {
void validateField(IndexableField f) {
String val = f.stringValue();
if (!val.startsWith("^") || !val.endsWith("$")) {
throw new RuntimeException("Invalid field:" + f.toString() + " val=" +val);
@ -104,7 +109,7 @@ public class TestThreadSafe extends LuceneTestCase {
void buildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen) throws IOException {
IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10));
TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10));
for (int j=0; j<nDocs; j++) {
Document d = new Document();
int nFields = random.nextInt(maxFields);
@ -113,9 +118,7 @@ public class TestThreadSafe extends LuceneTestCase {
StringBuilder sb = new StringBuilder("^ ");
while (sb.length() < flen) sb.append(' ').append(words[random.nextInt(words.length)]);
sb.append(" $");
Field.Store store = Field.Store.YES; // make random later
Field.Index index = Field.Index.ANALYZED; // make random later
d.add(newField("f"+i, sb.toString(), store, index));
d.add(newField("f"+i, sb.toString(), TextField.TYPE_STORED));
}
iw.addDocument(d);
}

View File

@ -19,7 +19,8 @@ package org.apache.lucene.sandbox.queries;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
@ -74,9 +75,9 @@ public class DuplicateFilterTest extends LuceneTestCase {
private void addDoc(RandomIndexWriter writer, String url, String text, String date) throws IOException {
Document doc = new Document();
doc.add(newField(KEY_FIELD, url, Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(newField("text", text, Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("date", date, Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField(KEY_FIELD, url, StringField.TYPE_STORED));
doc.add(newField("text", text, TextField.TYPE_STORED));
doc.add(newField("date", date, TextField.TYPE_STORED));
writer.addDocument(doc);
}

View File

@ -20,7 +20,7 @@ package org.apache.lucene.sandbox.queries;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
@ -68,8 +68,8 @@ public class FuzzyLikeThisQueryTest extends LuceneTestCase {
private void addDoc(RandomIndexWriter writer, String name, String id) throws IOException {
Document doc = new Document();
doc.add(newField("name", name, Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("id", id, Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("name", name, TextField.TYPE_STORED));
doc.add(newField("id", id, TextField.TYPE_STORED));
writer.addDocument(doc);
}

View File

@ -6,6 +6,7 @@ import java.util.Locale;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.*;
@ -57,7 +58,7 @@ public class TestSlowCollationMethods extends LuceneTestCase {
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
String value = _TestUtil.randomUnicodeString(random);
Field field = newField("field", value, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
Field field = newField("field", value, StringField.TYPE_STORED);
doc.add(field);
iw.addDocument(doc);
}

View File

@ -25,7 +25,7 @@ import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.index.TermsEnum;
@ -47,7 +47,7 @@ public class TestRegexQuery extends LuceneTestCase {
directory = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random, directory);
Document doc = new Document();
doc.add(newField(FN, "the quick brown fox jumps over the lazy dog", Field.Store.NO, Field.Index.ANALYZED));
doc.add(newField(FN, "the quick brown fox jumps over the lazy dog", TextField.TYPE_UNSTORED));
writer.addDocument(doc);
reader = writer.getReader();
writer.close();

View File

@ -21,7 +21,8 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
@ -62,12 +63,10 @@ public class TestSpanRegexQuery extends LuceneTestCase {
// Field.Store.NO, Field.Index.ANALYZED));
// writer.addDocument(doc);
// doc = new Document();
doc.add(newField("field", "auto update", Field.Store.NO,
Field.Index.ANALYZED));
doc.add(newField("field", "auto update", TextField.TYPE_UNSTORED));
writer.addDocument(doc);
doc = new Document();
doc.add(newField("field", "first auto update", Field.Store.NO,
Field.Index.ANALYZED));
doc.add(newField("field", "first auto update", TextField.TYPE_UNSTORED));
writer.addDocument(doc);
writer.optimize();
writer.close();
@ -87,13 +86,13 @@ public class TestSpanRegexQuery extends LuceneTestCase {
LockObtainFailedException, IOException {
// creating a document to store
Document lDoc = new Document();
lDoc.add(newField("field", "a1 b1", Field.Store.NO,
Field.Index.ANALYZED_NO_NORMS));
FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
customType.setOmitNorms(true);
lDoc.add(newField("field", "a1 b1", customType));
// creating a document to store
Document lDoc2 = new Document();
lDoc2.add(newField("field", "a2 b2", Field.Store.NO,
Field.Index.ANALYZED_NO_NORMS));
lDoc2.add(newField("field", "a2 b2", customType));
// creating first index writer
IndexWriter writerA = new IndexWriter(indexStoreA, newIndexWriterConfig(

View File

@ -23,10 +23,10 @@ import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -93,26 +93,22 @@ public class TestCartesian extends LuceneTestCase {
private void addPoint(IndexWriter writer, String name, double lat, double lng) throws IOException{
Document doc = new Document();
doc.add(newField("name", name,Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("name", name, TextField.TYPE_STORED));
// convert the lat / long to lucene fields
doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat));
doc.add(new NumericField(lngField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lng));
doc.add(new NumericField(latField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lat));
doc.add(new NumericField(lngField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lng));
// add a default meta field to make searching all documents easy
doc.add(newField("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("metafile", "doc", TextField.TYPE_STORED));
int ctpsize = ctps.size();
for (int i =0; i < ctpsize; i++){
CartesianTierPlotter ctp = ctps.get(i);
doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE,
Field.Store.YES,
true).setDoubleValue(ctp.getTierBoxId(lat,lng)));
doc.add(new NumericField(ctp.getTierFieldName(), Integer.MAX_VALUE, TextField.TYPE_STORED).setDoubleValue(ctp.getTierBoxId(lat,lng)));
doc.add(newField(geoHashPrefix, GeoHashUtils.encode(lat,lng),
Field.Store.YES,
Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(newField(geoHashPrefix, GeoHashUtils.encode(lat,lng), StringField.TYPE_STORED));
}
writer.addDocument(doc);

View File

@ -20,8 +20,8 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader.AtomicReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
@ -62,15 +62,15 @@ public class TestDistance extends LuceneTestCase {
private void addPoint(IndexWriter writer, String name, double lat, double lng) throws IOException{
Document doc = new Document();
doc.add(newField("name", name,Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("name", name, TextField.TYPE_STORED));
// convert the lat / long to lucene fields
doc.add(new NumericField(latField, Integer.MAX_VALUE, Field.Store.YES, true).setDoubleValue(lat));
doc.add(new NumericField(lngField, Integer.MAX_VALUE,Field.Store.YES, true).setDoubleValue(lng));
doc.add(new NumericField(latField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lat));
doc.add(new NumericField(lngField, Integer.MAX_VALUE, NumericField.TYPE_STORED).setDoubleValue(lng));
// add a default meta field to make searching all documents easy
doc.add(newField("metafile", "doc",Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("metafile", "doc", TextField.TYPE_STORED));
writer.addDocument(doc);
}

View File

@ -1,11 +1,29 @@
package org.apache.lucene.xmlparser;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
@ -24,22 +42,6 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* <p/>
* http://www.apache.org/licenses/LICENSE-2.0
* <p/>
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestParser extends LuceneTestCase {
@ -63,9 +65,9 @@ public class TestParser extends LuceneTestCase {
int endOfDate = line.indexOf('\t');
String date = line.substring(0, endOfDate).trim();
String content = line.substring(endOfDate).trim();
org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
doc.add(newField("date", date, Field.Store.YES, Field.Index.ANALYZED));
doc.add(newField("contents", content, Field.Store.YES, Field.Index.ANALYZED));
Document doc = new Document();
doc.add(newField("date", date, TextField.TYPE_STORED));
doc.add(newField("contents", content, TextField.TYPE_STORED));
NumericField numericField = new NumericField("date2");
numericField.setIntValue(Integer.valueOf(date));
doc.add(numericField);
@ -217,7 +219,7 @@ public class TestParser extends LuceneTestCase {
System.out.println("=========" + qType + "============");
ScoreDoc[] scoreDocs = hits.scoreDocs;
for (int i = 0; i < Math.min(numDocs, hits.totalHits); i++) {
org.apache.lucene.document.Document ldoc = searcher.doc(scoreDocs[i].doc);
Document ldoc = searcher.doc(scoreDocs[i].doc);
System.out.println("[" + ldoc.get("date") + "]" + ldoc.get("contents"));
}
System.out.println();

View File

@ -1,24 +1,5 @@
package org.apache.lucene.xmlparser;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import java.io.IOException;
import java.util.Locale;
import java.util.Properties;
import java.util.StringTokenizer;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -36,6 +17,26 @@ import java.util.StringTokenizer;
* limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.LuceneTestCase;
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;
import java.io.IOException;
import java.util.Locale;
import java.util.Properties;
import java.util.StringTokenizer;
/**
* This class illustrates how form input (such as from a web page or Swing gui) can be
* turned into Lucene queries using a choice of XSL templates for different styles of queries.
@ -125,7 +126,7 @@ public class TestQueryTemplateManager extends LuceneTestCase {
String name = st.nextToken().trim();
if (st.hasMoreTokens()) {
String value = st.nextToken().trim();
result.add(newField(name, value, Field.Store.YES, Field.Index.ANALYZED));
result.add(newField(name, value, TextField.TYPE_STORED));
}
}
return result;

View File

@ -22,11 +22,10 @@ import java.io.IOException;
import java.io.Closeable;
import java.lang.reflect.Modifier;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.document.Fieldable;
/** An Analyzer builds TokenStreams, which analyze text. It thus represents a
* policy for extracting index terms from text.
* <p>
@ -111,16 +110,16 @@ public abstract class Analyzer implements Closeable {
}
/**
* Invoked before indexing a Fieldable instance if
* Invoked before indexing a IndexableField instance if
* terms have already been added to that field. This allows custom
* analyzers to place an automatic position increment gap between
* Fieldable instances using the same field name. The default value
* IndexbleField instances using the same field name. The default value
* position increment gap is 0. With a 0 position increment gap and
* the typical default token position increment of 1, all terms in a field,
* including across Fieldable instances, are in successive positions, allowing
* exact PhraseQuery matches, for instance, across Fieldable instance boundaries.
* including across IndexableField instances, are in successive positions, allowing
* exact PhraseQuery matches, for instance, across IndexableField instance boundaries.
*
* @param fieldName Fieldable name being indexed.
* @param fieldName IndexableField name being indexed.
* @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
*/
public int getPositionIncrementGap(String fieldName) {
@ -138,11 +137,12 @@ public abstract class Analyzer implements Closeable {
* @param field the field just indexed
* @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
*/
public int getOffsetGap(Fieldable field) {
if (field.isTokenized())
public int getOffsetGap(IndexableField field) {
if (field.tokenized()) {
return 1;
else
} else {
return 0;
}
}
/** Frees persistent resources used by this Analyzer */

View File

@ -120,7 +120,7 @@ There are many post tokenization steps that can be done, including (but not limi
Applications usually do not invoke analysis &ndash; Lucene does it for them:
<ul>
<li>At indexing, as a consequence of
{@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document) addDocument(doc)},
{@link org.apache.lucene.index.IndexWriter#addDocument(Iterable) addDocument(doc)},
the Analyzer in effect for indexing is invoked for each indexed field of the added document.
</li>
<li>At search, a QueryParser may invoke the Analyzer during parsing. Note that for some queries, analysis does not
@ -170,7 +170,7 @@ the source code of any one of the many samples located in this package.
</p>
<h3>Field Section Boundaries</h3>
<p>
When {@link org.apache.lucene.document.Document#add(org.apache.lucene.document.Fieldable) document.add(field)}
When {@link org.apache.lucene.document.Document#add(org.apache.lucene.index.IndexableField) document.add(field)}
is called multiple times for the same field name, we could say that each such call creates a new
section for that field in that document.
In fact, a separate call to

View File

@ -1,312 +0,0 @@
package org.apache.lucene.document;
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.search.PhraseQuery; // for javadocs
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInvertState; // for javadocs
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
/**
*
*
**/
public abstract class AbstractField implements Fieldable {
protected String name = "body";
protected boolean storeTermVector = false;
protected boolean storeOffsetWithTermVector = false;
protected boolean storePositionWithTermVector = false;
protected boolean omitNorms = false;
protected boolean isStored = false;
protected boolean isIndexed = true;
protected boolean isTokenized = true;
protected boolean isBinary = false;
protected boolean lazy = false;
protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
protected float boost = 1.0f;
// the data object for all different kind of field values
protected Object fieldsData = null;
// pre-analyzed tokenStream for indexed fields
protected TokenStream tokenStream;
// length/offset for all primitive types
protected int binaryLength;
protected int binaryOffset;
protected PerDocFieldValues docValues;
protected AbstractField()
{
}
protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) {
if (name == null)
throw new NullPointerException("name cannot be null");
this.name = name;
this.isStored = store.isStored();
this.isIndexed = index.isIndexed();
this.isTokenized = index.isAnalyzed();
this.omitNorms = index.omitNorms();
this.isBinary = false;
setStoreTermVector(termVector);
}
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
*
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
* containing this field. If a document has multiple fields with the same
* name, all such values are multiplied together. This product is then
* used to compute the norm factor for the field. By
* default, in the {@link
* org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
* by the length normalization factor and then
* rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
* @see org.apache.lucene.document.Document#setBoost(float)
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
* @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)
*/
public void setBoost(float boost) {
this.boost = boost;
}
/** Returns the boost factor for hits for this field.
*
* <p>The default value is 1.0.
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
* {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
*/
public float getBoost() {
return boost;
}
/** Returns the name of the field.
* For example "date", "title", "body", ...
*/
public String name() { return name; }
protected void setStoreTermVector(Field.TermVector termVector) {
this.storeTermVector = termVector.isStored();
this.storePositionWithTermVector = termVector.withPositions();
this.storeOffsetWithTermVector = termVector.withOffsets();
}
/** True iff the value of the field is to be stored in the index for return
with search hits. It is an error for this to be true if a field is
Reader-valued. */
public final boolean isStored() { return isStored; }
/** True iff the value of the field is to be indexed, so that it may be
searched on. */
public final boolean isIndexed() { return isIndexed; }
/** True iff the value of the field should be tokenized as text prior to
indexing. Un-tokenized fields are indexed as a single word and may not be
Reader-valued. */
public final boolean isTokenized() { return isTokenized; }
/** True iff the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
* only to terms used to index it. If the original content must be
* preserved, use the <code>stored</code> attribute instead.
*
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
*/
public final boolean isTermVectorStored() { return storeTermVector; }
/**
* True iff terms are stored as term vector together with their offsets
* (start and end position in source text).
*/
public boolean isStoreOffsetWithTermVector(){
return storeOffsetWithTermVector;
}
/**
* True iff terms are stored as term vector together with their token positions.
*/
public boolean isStorePositionWithTermVector(){
return storePositionWithTermVector;
}
/** True iff the value of the filed is stored as binary */
public final boolean isBinary() {
return isBinary;
}
/**
* Return the raw byte[] for the binary field. Note that
* you must also call {@link #getBinaryLength} and {@link
* #getBinaryOffset} to know which range of bytes in this
* returned array belong to the field.
* @return reference to the Field value as byte[].
*/
public byte[] getBinaryValue() {
return getBinaryValue(null);
}
public byte[] getBinaryValue(byte[] result){
if (isBinary || fieldsData instanceof byte[])
return (byte[]) fieldsData;
else
return null;
}
/**
* Returns length of byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return length of byte[] segment that represents this Field value
*/
public int getBinaryLength() {
if (isBinary) {
return binaryLength;
} else if (fieldsData instanceof byte[])
return ((byte[]) fieldsData).length;
else
return 0;
}
/**
* Returns offset into byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return index of the first character in byte[] segment that represents this Field value
*/
public int getBinaryOffset() {
return binaryOffset;
}
/** True if norms are omitted for this indexed field */
public boolean getOmitNorms() { return omitNorms; }
/** @see #setIndexOptions */
public IndexOptions getIndexOptions() { return indexOptions; }
/** Expert:
*
* If set, omit normalization factors associated with this indexed field.
* This effectively disables indexing boosts and length normalization for this field.
*/
public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; }
/** Expert:
*
* If set, omit term freq, and optionally also positions and payloads from
* postings for this field.
*
* <p><b>NOTE</b>: While this option reduces storage space
* required in the index, it also means any query
* requiring positional information, such as {@link
* PhraseQuery} or {@link SpanQuery} subclasses will
* silently fail to find results.
*/
public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; }
public boolean isLazy() {
return lazy;
}
/** Prints a Field for human consumption. */
@Override
public final String toString() {
StringBuilder result = new StringBuilder();
if (isStored) {
result.append("stored");
}
if (isIndexed) {
if (result.length() > 0)
result.append(",");
result.append("indexed");
}
if (isTokenized) {
if (result.length() > 0)
result.append(",");
result.append("tokenized");
}
if (storeTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVector");
}
if (storeOffsetWithTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVectorOffsets");
}
if (storePositionWithTermVector) {
if (result.length() > 0)
result.append(",");
result.append("termVectorPosition");
}
if (isBinary) {
if (result.length() > 0)
result.append(",");
result.append("binary");
}
if (omitNorms) {
result.append(",omitNorms");
}
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
result.append(",indexOptions=");
result.append(indexOptions);
}
if (lazy){
result.append(",lazy");
}
result.append('<');
result.append(name);
result.append(':');
if (fieldsData != null && lazy == false) {
result.append(fieldsData);
}
result.append('>');
return result.toString();
}
public PerDocFieldValues getDocValues() {
return docValues;
}
public void setDocValues(PerDocFieldValues docValues) {
this.docValues = docValues;
}
public boolean hasDocValues() {
return docValues != null && docValues.type() != null;
}
public ValueType docValuesType() {
return docValues == null? null : docValues.type();
}
}

View File

@ -0,0 +1,49 @@
package org.apache.lucene.document;
import org.apache.lucene.util.BytesRef;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public final class BinaryField extends Field {
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_STORED.setStored(true);
TYPE_STORED.freeze();
}
public BinaryField(String name, byte[] value) {
super(name, BinaryField.TYPE_STORED, value);
}
public BinaryField(String name, byte[] value, int offset, int length) {
super(name, BinaryField.TYPE_STORED, value, offset, length);
}
public BinaryField(String name, BytesRef bytes) {
super(name, BinaryField.TYPE_STORED, bytes);
}
public BinaryField(String name, FieldType custom, byte[] value) {
super(name, custom, value);
}
public boolean isNumeric() {
return false;
}
}

View File

@ -92,16 +92,24 @@ public class CompressionTools {
return compress(result.bytes, 0, result.length, compressionLevel);
}
public static byte[] decompress(BytesRef bytes) throws DataFormatException {
return decompress(bytes.bytes, bytes.offset, bytes.length);
}
public static byte[] decompress(byte[] value) throws DataFormatException {
return decompress(value, 0, value.length);
}
/** Decompress the byte array previously returned by
* compress */
public static byte[] decompress(byte[] value) throws DataFormatException {
public static byte[] decompress(byte[] value, int offset, int length) throws DataFormatException {
// Create an expandable byte array to hold the decompressed data
ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length);
ByteArrayOutputStream bos = new ByteArrayOutputStream(length);
Inflater decompressor = new Inflater();
try {
decompressor.setInput(value);
decompressor.setInput(value, offset, length);
// Decompress the data
final byte[] buf = new byte[1024];
@ -119,9 +127,17 @@ public class CompressionTools {
/** Decompress the byte array previously returned by
* compressString back into a String */
public static String decompressString(byte[] value) throws DataFormatException {
final byte[] bytes = decompress(value);
return decompressString(value, 0, value.length);
}
public static String decompressString(byte[] value, int offset, int length) throws DataFormatException {
final byte[] bytes = decompress(value, offset, length);
CharsRef result = new CharsRef(bytes.length);
UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result);
return new String(result.chars, 0, result.length);
}
public static String decompressString(BytesRef bytes) throws DataFormatException {
return decompressString(bytes.bytes, bytes.offset, bytes.length);
}
}

View File

@ -17,61 +17,55 @@ package org.apache.lucene.document;
* limitations under the License.
*/
import java.util.*; // for javadoc
import java.util.*;
import org.apache.lucene.index.IndexReader; // for javadoc
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.IndexSearcher; // for javadoc
import org.apache.lucene.search.ScoreDoc; // for javadoc
import org.apache.lucene.index.IndexReader; // for javadoc
import org.apache.lucene.util.BytesRef;
/** Documents are the unit of indexing and search.
*
* A Document is a set of fields. Each field has a name and a textual value.
* A field may be {@link Fieldable#isStored() stored} with the document, in which
* A field may be {@link IndexableField#stored() stored} with the document, in which
* case it is returned with search hits on the document. Thus each document
* should typically contain one or more stored fields which uniquely identify
* it.
*
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
* <p>Note that fields which are <i>not</i> {@link IndexableField#stored() stored} are
* <i>not</i> available in documents retrieved from the index, e.g. with {@link
* ScoreDoc#doc} or {@link IndexReader#document(int)}.
*/
public final class Document {
List<Fieldable> fields = new ArrayList<Fieldable>();
private float boost = 1.0f;
public final class Document implements Iterable<IndexableField> {
private final List<IndexableField> fields = new ArrayList<IndexableField>();
/** Constructs a new document with no fields. */
public Document() {}
@Override
public Iterator<IndexableField> iterator() {
/** Sets a boost factor for hits on any field of this document. This value
* will be multiplied into the score of all hits on this document.
*
* <p>The default value is 1.0.
*
* <p>Values are multiplied into the value of {@link Fieldable#getBoost()} of
* each field in this document. Thus, this method in effect sets a default
* boost for the fields of this document.
*
* @see Fieldable#setBoost(float)
*/
public void setBoost(float boost) {
this.boost = boost;
}
return new Iterator<IndexableField>() {
private int fieldUpto = 0;
@Override
public boolean hasNext() {
return fieldUpto < fields.size();
}
/** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}.
*
* <p>Note that once a document is indexed this value is no longer available
* from the index. At search time, for retrieved documents, this method always
* returns 1. This however does not mean that the boost value set at indexing
* time was ignored - it was just combined with other indexing time factors and
* stored elsewhere, for better indexing and search performance. (For more
* information see the "norm(t,d)" part of the scoring formula in
* {@link org.apache.lucene.search.Similarity Similarity}.)
*
* @see #setBoost(float)
*/
public float getBoost() {
return boost;
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public IndexableField next() {
return fields.get(fieldUpto++);
}
};
}
/**
@ -84,7 +78,7 @@ public final class Document {
* a document has to be deleted from an index and a new changed version of that
* document has to be added.</p>
*/
public final void add(Fieldable field) {
public final void add(IndexableField field) {
fields.add(field);
}
@ -99,9 +93,9 @@ public final class Document {
* document has to be added.</p>
*/
public final void removeField(String name) {
Iterator<Fieldable> it = fields.iterator();
Iterator<IndexableField> it = fields.iterator();
while (it.hasNext()) {
Fieldable field = it.next();
IndexableField field = it.next();
if (field.name().equals(name)) {
it.remove();
return;
@ -119,148 +113,15 @@ public final class Document {
* document has to be added.</p>
*/
public final void removeFields(String name) {
Iterator<Fieldable> it = fields.iterator();
Iterator<IndexableField> it = fields.iterator();
while (it.hasNext()) {
Fieldable field = it.next();
IndexableField field = it.next();
if (field.name().equals(name)) {
it.remove();
}
}
}
/** Returns a field with the given name if any exist in this document, or
* null. If multiple fields exists with this name, this method returns the
* first value added.
* Do not use this method with lazy loaded fields or {@link NumericField}.
* @deprecated use {@link #getFieldable} instead and cast depending on
* data type.
* @throws ClassCastException if you try to retrieve a numerical or
* lazy loaded field.
*/
@Deprecated
public final Field getField(String name) {
return (Field) getFieldable(name);
}
/** Returns a field with the given name if any exist in this document, or
* null. If multiple fields exists with this name, this method returns the
* first value added.
*/
public Fieldable getFieldable(String name) {
for (Fieldable field : fields) {
if (field.name().equals(name))
return field;
}
return null;
}
/** Returns the string value of the field with the given name if any exist in
* this document, or null. If multiple fields exist with this name, this
* method returns the first value added. If only binary fields with this name
* exist, returns null.
* For {@link NumericField} it returns the string value of the number. If you want
* the actual {@code NumericField} instance back, use {@link #getFieldable}.
*/
public final String get(String name) {
for (Fieldable field : fields) {
if (field.name().equals(name) && (!field.isBinary()))
return field.stringValue();
}
return null;
}
/** Returns a List of all the fields in a document.
* <p>Note that fields which are <i>not</i> {@link Fieldable#isStored() stored} are
* <i>not</i> available in documents retrieved from the
* index, e.g. {@link IndexSearcher#doc(int)} or {@link
* IndexReader#document(int)}.
*/
public final List<Fieldable> getFields() {
return fields;
}
private final static Field[] NO_FIELDS = new Field[0];
/**
* Returns an array of {@link Field}s with the given name.
* This method returns an empty array when there are no
* matching fields. It never returns null.
* Do not use this method with lazy loaded fields or {@link NumericField}.
*
* @param name the name of the field
* @return a <code>Field[]</code> array
* @deprecated use {@link #getFieldable} instead and cast depending on
* data type.
* @throws ClassCastException if you try to retrieve a numerical or
* lazy loaded field.
*/
@Deprecated
public final Field[] getFields(String name) {
List<Field> result = new ArrayList<Field>();
for (Fieldable field : fields) {
if (field.name().equals(name)) {
result.add((Field) field);
}
}
if (result.size() == 0)
return NO_FIELDS;
return result.toArray(new Field[result.size()]);
}
private final static Fieldable[] NO_FIELDABLES = new Fieldable[0];
/**
* Returns an array of {@link Fieldable}s with the given name.
* This method returns an empty array when there are no
* matching fields. It never returns null.
*
* @param name the name of the field
* @return a <code>Fieldable[]</code> array
*/
public Fieldable[] getFieldables(String name) {
List<Fieldable> result = new ArrayList<Fieldable>();
for (Fieldable field : fields) {
if (field.name().equals(name)) {
result.add(field);
}
}
if (result.size() == 0)
return NO_FIELDABLES;
return result.toArray(new Fieldable[result.size()]);
}
private final static String[] NO_STRINGS = new String[0];
/**
* Returns an array of values of the field specified as the method parameter.
* This method returns an empty array when there are no
* matching fields. It never returns null.
* For {@link NumericField}s it returns the string value of the number. If you want
* the actual {@code NumericField} instances back, use {@link #getFieldables}.
* @param name the name of the field
* @return a <code>String[]</code> of field values
*/
public final String[] getValues(String name) {
List<String> result = new ArrayList<String>();
for (Fieldable field : fields) {
if (field.name().equals(name) && (!field.isBinary()))
result.add(field.stringValue());
}
if (result.size() == 0)
return NO_STRINGS;
return result.toArray(new String[result.size()]);
}
private final static byte[][] NO_BYTES = new byte[0][];
/**
* Returns an array of byte arrays for of the fields that have the name specified
@ -271,17 +132,18 @@ public final class Document {
* @param name the name of the field
* @return a <code>byte[][]</code> of binary field values
*/
public final byte[][] getBinaryValues(String name) {
List<byte[]> result = new ArrayList<byte[]>();
for (Fieldable field : fields) {
if (field.name().equals(name) && (field.isBinary()))
result.add(field.getBinaryValue());
public final BytesRef[] getBinaryValues(String name) {
final List<BytesRef> result = new ArrayList<BytesRef>();
for (IndexableField field : fields) {
if (field.name().equals(name)) {
final BytesRef bytes = field.binaryValue();
if (bytes != null) {
result.add(bytes);
}
}
}
if (result.size() == 0)
return NO_BYTES;
return result.toArray(new byte[result.size()][]);
return result.toArray(new BytesRef[result.size()]);
}
/**
@ -293,10 +155,72 @@ public final class Document {
* @param name the name of the field.
* @return a <code>byte[]</code> containing the binary field value or <code>null</code>
*/
public final byte[] getBinaryValue(String name) {
for (Fieldable field : fields) {
if (field.name().equals(name) && (field.isBinary()))
return field.getBinaryValue();
public final BytesRef getBinaryValue(String name) {
for (IndexableField field : fields) {
if (field.name().equals(name)) {
final BytesRef bytes = field.binaryValue();
if (bytes != null) {
return bytes;
}
}
}
return null;
}
/** Returns a field with the given name if any exist in this document, or
* null. If multiple fields exists with this name, this method returns the
* first value added.
*/
public final IndexableField getField(String name) {
for (IndexableField field : fields) {
if (field.name().equals(name)) {
return field;
}
}
return null;
}
/**
* Returns an array of {@link IndexableField}s with the given name.
* This method returns an empty array when there are no
* matching fields. It never returns null.
*
* @param name the name of the field
* @return a <code>Fieldable[]</code> array
*/
public IndexableField[] getFields(String name) {
List<IndexableField> result = new ArrayList<IndexableField>();
for (IndexableField field : fields) {
if (field.name().equals(name)) {
result.add(field);
}
}
return result.toArray(new IndexableField[result.size()]);
}
/** Returns a List of all the fields in a document.
* <p>Note that fields which are <i>not</i> stored are
* <i>not</i> available in documents retrieved from the
* index, e.g. {@link IndexSearcher#doc(int)} or {@link
* IndexReader#document(int)}.
*/
public final List<IndexableField> getFields() {
return fields;
}
/** Returns the string value of the field with the given name if any exist in
* this document, or null. If multiple fields exist with this name, this
* method returns the first value added. If only binary fields with this name
* exist, returns null.
* For {@link NumericField} it returns the string value of the number. If you want
* the actual {@code NumericField} instance back, use {@link #getField}.
*/
public final String get(String name) {
for (IndexableField field : fields) {
if (field.name().equals(name) && field.stringValue() != null) {
return field.stringValue();
}
}
return null;
}
@ -307,7 +231,7 @@ public final class Document {
StringBuilder buffer = new StringBuilder();
buffer.append("Document<");
for (int i = 0; i < fields.size(); i++) {
Fieldable field = fields.get(i);
IndexableField field = fields.get(i);
buffer.append(field.toString());
if (i != fields.size()-1)
buffer.append(" ");

View File

@ -21,514 +21,325 @@ import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
import org.apache.lucene.util.BytesRef;
/**
A field is a section of a Document. Each field has two parts, a name and a
value. Values may be free text, provided as a String or as a Reader, or they
may be atomic keywords, which are not further processed. Such keywords may
be used to represent dates, urls, etc. Fields are optionally stored in the
index, so that they may be returned with hits on the document.
*/
* A field is a section of a Document. Each field has two parts, a name and a
* value. Values may be free text, provided as a String or as a Reader, or they
* may be atomic keywords, which are not further processed. Such keywords may be
* used to represent dates, urls, etc. Fields are optionally stored in the
* index, so that they may be returned with hits on the document.
*/
public final class Field extends AbstractField implements Fieldable {
public class Field implements IndexableField {
/** Specifies whether and how a field should be stored. */
public static enum Store {
protected FieldType type;
protected String name = "body";
// the data object for all different kind of field values
protected Object fieldsData;
// pre-analyzed tokenStream for indexed fields
protected TokenStream tokenStream;
// length/offset for all primitive types
protected PerDocFieldValues docValues;
protected float boost = 1.0f;
/** Store the original field value in the index. This is useful for short texts
* like a document's title which should be displayed with the results. The
* value is stored in its original form, i.e. no analyzer is used before it is
* stored.
*/
YES {
@Override
public boolean isStored() { return true; }
},
/** Do not store the field value in the index. */
NO {
@Override
public boolean isStored() { return false; }
};
public abstract boolean isStored();
}
/** Specifies whether and how a field should be indexed. */
public static enum Index {
/** Do not index the field value. This field can thus not be searched,
* but one can still access its contents provided it is
* {@link Field.Store stored}. */
NO {
@Override
public boolean isIndexed() { return false; }
@Override
public boolean isAnalyzed() { return false; }
@Override
public boolean omitNorms() { return true; }
},
/** Index the tokens produced by running the field's
* value through an Analyzer. This is useful for
* common text. */
ANALYZED {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return true; }
@Override
public boolean omitNorms() { return false; }
},
/** Index the field's value without using an Analyzer, so it can be searched.
* As no analyzer is used the value will be stored as a single term. This is
* useful for unique Ids like product numbers.
*/
NOT_ANALYZED {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return false; }
@Override
public boolean omitNorms() { return false; }
},
/** Expert: Index the field's value without an Analyzer,
* and also disable the indexing of norms. Note that you
* can also separately enable/disable norms by calling
* {@link Field#setOmitNorms}. No norms means that
* index-time field and document boosting and field
* length normalization are disabled. The benefit is
* less memory usage as norms take up one byte of RAM
* per indexed field for every document in the index,
* during searching. Note that once you index a given
* field <i>with</i> norms disabled, enabling norms will
* have no effect. In other words, for this to have the
* above described effect on a field, one instance of
* that field must be indexed with NOT_ANALYZED_NO_NORMS
* at some point. */
NOT_ANALYZED_NO_NORMS {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return false; }
@Override
public boolean omitNorms() { return true; }
},
/** Expert: Index the tokens produced by running the
* field's value through an Analyzer, and also
* separately disable the storing of norms. See
* {@link #NOT_ANALYZED_NO_NORMS} for what norms are
* and why you may want to disable them. */
ANALYZED_NO_NORMS {
@Override
public boolean isIndexed() { return true; }
@Override
public boolean isAnalyzed() { return true; }
@Override
public boolean omitNorms() { return true; }
};
/** Get the best representation of the index given the flags. */
public static Index toIndex(boolean indexed, boolean analyzed) {
return toIndex(indexed, analyzed, false);
}
/** Expert: Get the best representation of the index given the flags. */
public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) {
// If it is not indexed nothing else matters
if (!indexed) {
return Index.NO;
}
// typical, non-expert
if (!omitNorms) {
if (analyzed) {
return Index.ANALYZED;
}
return Index.NOT_ANALYZED;
}
// Expert: Norms omitted
if (analyzed) {
return Index.ANALYZED_NO_NORMS;
}
return Index.NOT_ANALYZED_NO_NORMS;
}
public abstract boolean isIndexed();
public abstract boolean isAnalyzed();
public abstract boolean omitNorms();
}
/** Specifies whether and how a field should have term vectors. */
public static enum TermVector {
/** Do not store term vectors.
*/
NO {
@Override
public boolean isStored() { return false; }
@Override
public boolean withPositions() { return false; }
@Override
public boolean withOffsets() { return false; }
},
/** Store the term vectors of each document. A term vector is a list
* of the document's terms and their number of occurrences in that document. */
YES {
@Override
public boolean isStored() { return true; }
@Override
public boolean withPositions() { return false; }
@Override
public boolean withOffsets() { return false; }
},
/**
* Store the term vector + token position information
*
* @see #YES
*/
WITH_POSITIONS {
@Override
public boolean isStored() { return true; }
@Override
public boolean withPositions() { return true; }
@Override
public boolean withOffsets() { return false; }
},
/**
* Store the term vector + Token offset information
*
* @see #YES
*/
WITH_OFFSETS {
@Override
public boolean isStored() { return true; }
@Override
public boolean withPositions() { return false; }
@Override
public boolean withOffsets() { return true; }
},
/**
* Store the term vector + Token position and offset information
*
* @see #YES
* @see #WITH_POSITIONS
* @see #WITH_OFFSETS
*/
WITH_POSITIONS_OFFSETS {
@Override
public boolean isStored() { return true; }
@Override
public boolean withPositions() { return true; }
@Override
public boolean withOffsets() { return true; }
};
/** Get the best representation of a TermVector given the flags. */
public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) {
// If it is not stored, nothing else matters.
if (!stored) {
return TermVector.NO;
}
if (withOffsets) {
if (withPositions) {
return Field.TermVector.WITH_POSITIONS_OFFSETS;
}
return Field.TermVector.WITH_OFFSETS;
}
if (withPositions) {
return Field.TermVector.WITH_POSITIONS;
}
return Field.TermVector.YES;
}
public abstract boolean isStored();
public abstract boolean withPositions();
public abstract boolean withOffsets();
public Field(String name, FieldType type) {
this.name = name;
this.type = type;
}
/** The value of the field as a String, or null. If null, the Reader value or
* binary value is used. Exactly one of stringValue(),
* readerValue(), and getBinaryValue() must be set. */
public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; }
/** The value of the field as a Reader, or null. If null, the String value or
* binary value is used. Exactly one of stringValue(),
* readerValue(), and getBinaryValue() must be set. */
public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; }
/** The TokesStream for this field to be used when indexing, or null. If null, the Reader value
* or String value is analyzed to produce the indexed tokens. */
public TokenStream tokenStreamValue() { return tokenStream; }
/** <p>Expert: change the value of this field. This can
* be used during indexing to re-use a single Field
* instance to improve indexing speed by avoiding GC cost
* of new'ing and reclaiming Field instances. Typically
* a single {@link Document} instance is re-used as
* well. This helps most on small documents.</p>
*
* <p>Each Field instance should only be used once
* within a single {@link Document} instance. See <a
* href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed">ImproveIndexingSpeed</a>
* for details.</p> */
public void setValue(String value) {
if (isBinary) {
throw new IllegalArgumentException("cannot set a String value on a binary field");
}
fieldsData = value;
}
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
public void setValue(Reader value) {
if (isBinary) {
throw new IllegalArgumentException("cannot set a Reader value on a binary field");
}
if (isStored) {
throw new IllegalArgumentException("cannot set a Reader value on a stored field");
}
fieldsData = value;
}
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
public void setValue(byte[] value) {
if (!isBinary) {
throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
}
fieldsData = value;
binaryLength = value.length;
binaryOffset = 0;
}
/** Expert: change the value of this field. See <a href="#setValue(java.lang.String)">setValue(String)</a>. */
public void setValue(byte[] value, int offset, int length) {
if (!isBinary) {
throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field");
}
fieldsData = value;
binaryLength = length;
binaryOffset = offset;
}
/** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true.
* May be combined with stored values from stringValue() or getBinaryValue() */
public void setTokenStream(TokenStream tokenStream) {
this.isIndexed = true;
this.isTokenized = true;
this.tokenStream = tokenStream;
}
/**
* Create a field by specifying its name, value and how it will
* be saved in the index. Term vectors will not be stored in the index.
*
* @param name The name of the field
* @param value The string to process
* @param store Whether <code>value</code> should be stored in the index
* @param index Whether the field should be indexed, and if so, if it should
* be tokenized before indexing
* @throws NullPointerException if name or value is <code>null</code>
* @throws IllegalArgumentException if the field is neither stored nor indexed
*/
public Field(String name, String value, Store store, Index index) {
this(name, value, store, index, TermVector.NO);
}
/**
* Create a field by specifying its name, value and how it will
* be saved in the index.
*
* @param name The name of the field
* @param value The string to process
* @param store Whether <code>value</code> should be stored in the index
* @param index Whether the field should be indexed, and if so, if it should
* be tokenized before indexing
* @param termVector Whether term vector should be stored
* @throws NullPointerException if name or value is <code>null</code>
* @throws IllegalArgumentException in any of the following situations:
* <ul>
* <li>the field is neither stored nor indexed</li>
* <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
* </ul>
*/
public Field(String name, String value, Store store, Index index, TermVector termVector) {
if (name == null)
public Field(String name, FieldType type, Reader reader) {
if (name == null) {
throw new NullPointerException("name cannot be null");
if (value == null)
throw new NullPointerException("value cannot be null");
if (name.length() == 0 && value.length() == 0)
throw new IllegalArgumentException("name and value cannot both be empty");
if (index == Index.NO && store == Store.NO)
throw new IllegalArgumentException("it doesn't make sense to have a field that "
+ "is neither indexed nor stored");
if (index == Index.NO && termVector != TermVector.NO)
throw new IllegalArgumentException("cannot store term vector information "
+ "for a field that is not indexed");
this.name = name;
this.fieldsData = value;
this.isStored = store.isStored();
this.isIndexed = index.isIndexed();
this.isTokenized = index.isAnalyzed();
this.omitNorms = index.omitNorms();
if (index == Index.NO) {
// note: now this reads even wierder than before
this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
}
this.isBinary = false;
setStoreTermVector(termVector);
}
/**
* Create a tokenized and indexed field that is not stored. Term vectors will
* not be stored. The Reader is read only when the Document is added to the index,
* i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
* has been called.
*
* @param name The name of the field
* @param reader The reader with the content
* @throws NullPointerException if name or reader is <code>null</code>
*/
public Field(String name, Reader reader) {
this(name, reader, TermVector.NO);
}
/**
* Create a tokenized and indexed field that is not stored, optionally with
* storing term vectors. The Reader is read only when the Document is added to the index,
* i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)}
* has been called.
*
* @param name The name of the field
* @param reader The reader with the content
* @param termVector Whether term vector should be stored
* @throws NullPointerException if name or reader is <code>null</code>
*/
public Field(String name, Reader reader, TermVector termVector) {
if (name == null)
throw new NullPointerException("name cannot be null");
if (reader == null)
}
if (reader == null) {
throw new NullPointerException("reader cannot be null");
}
this.name = name;
this.fieldsData = reader;
this.isStored = false;
this.isIndexed = true;
this.isTokenized = true;
this.isBinary = false;
setStoreTermVector(termVector);
}
/**
* Create a tokenized and indexed field that is not stored. Term vectors will
* not be stored. This is useful for pre-analyzed fields.
* The TokenStream is read only when the Document is added to the index,
* i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
* has been called.
*
* @param name The name of the field
* @param tokenStream The TokenStream with the content
* @throws NullPointerException if name or tokenStream is <code>null</code>
*/
public Field(String name, TokenStream tokenStream) {
this(name, tokenStream, TermVector.NO);
this.type = type;
}
/**
* Create a tokenized and indexed field that is not stored, optionally with
* storing term vectors. This is useful for pre-analyzed fields.
* The TokenStream is read only when the Document is added to the index,
* i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)}
* has been called.
*
* @param name The name of the field
* @param tokenStream The TokenStream with the content
* @param termVector Whether term vector should be stored
* @throws NullPointerException if name or tokenStream is <code>null</code>
*/
public Field(String name, TokenStream tokenStream, TermVector termVector) {
if (name == null)
public Field(String name, FieldType type, TokenStream tokenStream) {
if (name == null) {
throw new NullPointerException("name cannot be null");
if (tokenStream == null)
}
if (tokenStream == null) {
throw new NullPointerException("tokenStream cannot be null");
}
this.name = name;
this.fieldsData = null;
this.tokenStream = tokenStream;
this.isStored = false;
this.isIndexed = true;
this.isTokenized = true;
this.isBinary = false;
setStoreTermVector(termVector);
this.type = type;
}
public Field(String name, FieldType type, byte[] value) {
this(name, type, value, 0, value.length);
}
public Field(String name, FieldType type, byte[] value, int offset, int length) {
this.fieldsData = new BytesRef(value, offset, length);
this.type = type;
this.name = name;
}
public Field(String name, FieldType type, BytesRef bytes) {
this.fieldsData = bytes;
this.type = type;
this.name = name;
}
public Field(String name, FieldType type, String value) {
if (name == null) {
throw new IllegalArgumentException("name cannot be null");
}
if (value == null) {
throw new IllegalArgumentException("value cannot be null");
}
if (!type.stored() && !type.indexed()) {
throw new IllegalArgumentException("it doesn't make sense to have a field that "
+ "is neither indexed nor stored");
}
if (!type.indexed() && !type.tokenized() && (type.storeTermVectors())) {
throw new IllegalArgumentException("cannot store term vector information "
+ "for a field that is not indexed");
}
this.type = type;
this.name = name;
this.fieldsData = value;
}
/**
* The value of the field as a String, or null. If null, the Reader value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set.
*/
public String stringValue() {
return fieldsData instanceof String ? (String) fieldsData : null;
}
/**
* Create a stored field with binary value. Optionally the value may be compressed.
*
* @param name The name of the field
* @param value The binary value
* The value of the field as a Reader, or null. If null, the String value or
* binary value is used. Exactly one of stringValue(), readerValue(), and
* getBinaryValue() must be set.
*/
public Field(String name, byte[] value) {
this(name, value, 0, value.length);
public Reader readerValue() {
return fieldsData instanceof Reader ? (Reader) fieldsData : null;
}
/**
* Create a stored field with binary value. Optionally the value may be compressed.
*
* @param name The name of the field
* @param value The binary value
* @param offset Starting offset in value where this Field's bytes are
* @param length Number of bytes to use for this Field, starting at offset
* The TokesStream for this field to be used when indexing, or null. If null,
* the Reader value or String value is analyzed to produce the indexed tokens.
*/
public Field(String name, byte[] value, int offset, int length) {
if (name == null)
throw new IllegalArgumentException("name cannot be null");
if (value == null)
throw new IllegalArgumentException("value cannot be null");
this.name = name;
public TokenStream tokenStreamValue() {
return tokenStream;
}
/**
* <p>
* Expert: change the value of this field. This can be used during indexing to
* re-use a single Field instance to improve indexing speed by avoiding GC
* cost of new'ing and reclaiming Field instances. Typically a single
* {@link Document} instance is re-used as well. This helps most on small
* documents.
* </p>
*
* <p>
* Each Field instance should only be used once within a single
* {@link Document} instance. See <a
* href="http://wiki.apache.org/lucene-java/ImproveIndexingSpeed"
* >ImproveIndexingSpeed</a> for details.
* </p>
*/
public void setValue(String value) {
if (isBinary()) {
throw new IllegalArgumentException(
"cannot set a String value on a binary field");
}
fieldsData = value;
}
/**
* Expert: change the value of this field. See <a
* href="#setValue(java.lang.String)">setValue(String)</a>.
*/
public void setValue(Reader value) {
if (isBinary()) {
throw new IllegalArgumentException(
"cannot set a Reader value on a binary field");
}
if (stored()) {
throw new IllegalArgumentException(
"cannot set a Reader value on a stored field");
}
fieldsData = value;
}
/**
* Expert: change the value of this field. See <a
* href="#setValue(java.lang.String)">setValue(String)</a>.
*/
public void setValue(byte[] value) {
if (!isBinary()) {
throw new IllegalArgumentException(
"cannot set a byte[] value on a non-binary field");
}
fieldsData = new BytesRef(value);
}
/**
* Expert: change the value of this field. See <a
* href="#setValue(java.lang.String)">setValue(String)</a>.
*/
/*
public void setValue(byte[] value, int offset, int length) {
if (!isBinary) {
throw new IllegalArgumentException(
"cannot set a byte[] value on a non-binary field");
}
fieldsData = value;
isStored = true;
isIndexed = false;
isTokenized = false;
indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
omitNorms = true;
isBinary = true;
binaryLength = length;
binaryOffset = offset;
setStoreTermVector(TermVector.NO);
}
*/
/**
* Expert: sets the token stream to be used for indexing and causes
* isIndexed() and isTokenized() to return true. May be combined with stored
* values from stringValue() or getBinaryValue()
*/
public void setTokenStream(TokenStream tokenStream) {
if (!indexed() || !tokenized()) {
throw new IllegalArgumentException(
"cannot set token stream on non indexed and tokenized field");
}
this.tokenStream = tokenStream;
}
public String name() {
return name;
}
public float boost() {
return boost;
}
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
*
* <p>The boost is used to compute the norm factor for the field. By
* default, in the {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method,
* the boost value is multiplied by the length normalization factor and then
* rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
* @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)
*/
public void setBoost(float boost) {
this.boost = boost;
}
public boolean numeric() {
return false;
}
public Number numericValue() {
return null;
}
public NumericField.DataType numericDataType() {
return null;
}
public BytesRef binaryValue() {
if (!isBinary()) {
return null;
} else {
return (BytesRef) fieldsData;
}
}
/** methods from inner FieldType */
public boolean isBinary() {
return fieldsData instanceof BytesRef;
}
public boolean stored() {
return type.stored();
}
public boolean indexed() {
return type.indexed();
}
public boolean tokenized() {
return type.tokenized();
}
public boolean omitNorms() {
return type.omitNorms();
}
public IndexOptions indexOptions() {
return type.indexOptions();
}
public boolean storeTermVectors() {
return type.storeTermVectors();
}
public boolean storeTermVectorOffsets() {
return type.storeTermVectorOffsets();
}
public boolean storeTermVectorPositions() {
return type.storeTermVectorPositions();
}
/** Prints a Field for human consumption. */
@Override
public String toString() {
StringBuilder result = new StringBuilder();
result.append(type.toString());
result.append('<');
result.append(name);
result.append(':');
if (fieldsData != null && type.lazy() == false) {
result.append(fieldsData);
}
result.append('>');
return result.toString();
}
public void setDocValues(PerDocFieldValues docValues) {
this.docValues = docValues;
}
@Override
public PerDocFieldValues docValues() {
return null;
}
@Override
public ValueType docValuesType() {
return null;
}
/** Returns FieldType for this field. */
public FieldType getFieldType() {
return type;
}
}

View File

@ -0,0 +1,186 @@
package org.apache.lucene.document;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.FieldInfo.IndexOptions;
public class FieldType {
private boolean indexed;
private boolean stored;
private boolean tokenized;
private boolean storeTermVectors;
private boolean storeTermVectorOffsets;
private boolean storeTermVectorPositions;
private boolean omitNorms;
private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
private boolean lazy;
private boolean frozen;
public FieldType(FieldType ref) {
this.indexed = ref.indexed();
this.stored = ref.stored();
this.tokenized = ref.tokenized();
this.storeTermVectors = ref.storeTermVectors();
this.storeTermVectorOffsets = ref.storeTermVectorOffsets();
this.storeTermVectorPositions = ref.storeTermVectorPositions();
this.omitNorms = ref.omitNorms();
this.indexOptions = ref.indexOptions();
this.lazy = ref.lazy();
}
public FieldType() {
}
private void checkIfFrozen() {
if (frozen) {
throw new IllegalStateException();
}
}
public void freeze() {
this.frozen = true;
}
public boolean indexed() {
return this.indexed;
}
public void setIndexed(boolean value) {
checkIfFrozen();
this.indexed = value;
}
public boolean stored() {
return this.stored;
}
public void setStored(boolean value) {
checkIfFrozen();
this.stored = value;
}
public boolean tokenized() {
return this.tokenized;
}
public void setTokenized(boolean value) {
checkIfFrozen();
this.tokenized = value;
}
public boolean storeTermVectors() {
return this.storeTermVectors;
}
public void setStoreTermVectors(boolean value) {
checkIfFrozen();
this.storeTermVectors = value;
}
public boolean storeTermVectorOffsets() {
return this.storeTermVectorOffsets;
}
public void setStoreTermVectorOffsets(boolean value) {
checkIfFrozen();
this.storeTermVectorOffsets = value;
}
public boolean storeTermVectorPositions() {
return this.storeTermVectorPositions;
}
public void setStoreTermVectorPositions(boolean value) {
checkIfFrozen();
this.storeTermVectorPositions = value;
}
public boolean omitNorms() {
return this.omitNorms;
}
public void setOmitNorms(boolean value) {
checkIfFrozen();
this.omitNorms = value;
}
public IndexOptions indexOptions() {
return this.indexOptions;
}
public void setIndexOptions(IndexOptions value) {
checkIfFrozen();
this.indexOptions = value;
}
public boolean lazy() {
return this.lazy;
}
public void setLazy(boolean value) {
checkIfFrozen();
this.lazy = value;
}
/** Prints a Field for human consumption. */
@Override
public final String toString() {
StringBuilder result = new StringBuilder();
if (stored()) {
result.append("stored");
}
if (indexed()) {
if (result.length() > 0)
result.append(",");
result.append("indexed");
}
if (tokenized()) {
if (result.length() > 0)
result.append(",");
result.append("tokenized");
}
if (storeTermVectors()) {
if (result.length() > 0)
result.append(",");
result.append("termVector");
}
if (storeTermVectorOffsets()) {
if (result.length() > 0)
result.append(",");
result.append("termVectorOffsets");
}
if (storeTermVectorPositions()) {
if (result.length() > 0)
result.append(",");
result.append("termVectorPosition");
}
if (omitNorms()) {
result.append(",omitNorms");
}
if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) {
result.append(",indexOptions=");
result.append(indexOptions);
}
if (lazy()){
result.append(",lazy");
}
return result.toString();
}
}

View File

@ -1,238 +0,0 @@
package org.apache.lucene.document;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.FieldInvertState; // for javadocs
import org.apache.lucene.index.values.IndexDocValues;
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
import org.apache.lucene.search.PhraseQuery; // for javadocs
import org.apache.lucene.search.spans.SpanQuery; // for javadocs
import java.io.Reader;
/**
* Synonymous with {@link Field}.
*
* <p><bold>WARNING</bold>: This interface may change within minor versions, despite Lucene's backward compatibility requirements.
* This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards
* compatibility promises remain intact. For example, Lucene can still
* read and write indices created within the same major version.
* </p>
*
**/
public interface Fieldable {
/** Sets the boost factor hits on this field. This value will be
* multiplied into the score of all hits on this this field of this
* document.
*
* <p>The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document
* containing this field. If a document has multiple fields with the same
* name, all such values are multiplied together. This product is then
* used to compute the norm factor for the field. By
* default, in the {@link
* org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied
* by the length normalization factor
* and then rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the
* index. One should attempt to ensure that this product does not overflow
* the range of that encoding.
*
* @see org.apache.lucene.document.Document#setBoost(float)
* @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)
* @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)
*/
void setBoost(float boost);
/** Returns the boost factor for hits for this field.
*
* <p>The default value is 1.0.
*
* <p>Note: this value is not stored directly with the document in the index.
* Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and
* {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when
* this field was indexed.
*
* @see #setBoost(float)
*/
float getBoost();
/** Returns the name of the field.
* For example "date", "title", "body", ...
*/
String name();
/** The value of the field as a String, or null.
* <p>
* For indexing, if isStored()==true, the stringValue() will be used as the stored field value
* unless isBinary()==true, in which case getBinaryValue() will be used.
*
* If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token.
* If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null,
* else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens.
*/
public String stringValue();
/** The value of the field as a Reader, which can be used at index time to generate indexed tokens.
* @see #stringValue()
*/
public Reader readerValue();
/** The TokenStream for this field to be used when indexing, or null.
* @see #stringValue()
*/
public TokenStream tokenStreamValue();
/** True if the value of the field is to be stored in the index for return
with search hits. */
boolean isStored();
/** True if the value of the field is to be indexed, so that it may be
searched on. */
boolean isIndexed();
/** True if the value of the field should be tokenized as text prior to
indexing. Un-tokenized fields are indexed as a single word and may not be
Reader-valued. */
boolean isTokenized();
/** True if the term or terms used to index this field are stored as a term
* vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}.
* These methods do not provide access to the original content of the field,
* only to terms used to index it. If the original content must be
* preserved, use the <code>stored</code> attribute instead.
*
* @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String)
*/
boolean isTermVectorStored();
/**
* True if terms are stored as term vector together with their offsets
* (start and end positon in source text).
*/
boolean isStoreOffsetWithTermVector();
/**
* True if terms are stored as term vector together with their token positions.
*/
boolean isStorePositionWithTermVector();
/** True if the value of the field is stored as binary */
boolean isBinary();
/** True if norms are omitted for this indexed field */
boolean getOmitNorms();
/** Expert:
*
* If set, omit normalization factors associated with this indexed field.
* This effectively disables indexing boosts and length normalization for this field.
*/
void setOmitNorms(boolean omitNorms);
/**
* Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving
* it's values via {@link #stringValue()} or {@link #getBinaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that
* retrieved the {@link Document} is still open.
*
* @return true if this field can be loaded lazily
*/
boolean isLazy();
/**
* Returns offset into byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return index of the first character in byte[] segment that represents this Field value
*/
abstract int getBinaryOffset();
/**
* Returns length of byte[] segment that is used as value, if Field is not binary
* returned value is undefined
* @return length of byte[] segment that represents this Field value
*/
abstract int getBinaryLength();
/**
* Return the raw byte[] for the binary field. Note that
* you must also call {@link #getBinaryLength} and {@link
* #getBinaryOffset} to know which range of bytes in this
* returned array belong to the field.
* @return reference to the Field value as byte[].
*/
abstract byte[] getBinaryValue();
/**
* Return the raw byte[] for the binary field. Note that
* you must also call {@link #getBinaryLength} and {@link
* #getBinaryOffset} to know which range of bytes in this
* returned array belong to the field.<p>
* About reuse: if you pass in the result byte[] and it is
* used, likely the underlying implementation will hold
* onto this byte[] and return it in future calls to
* {@link #getBinaryValue()}.
* So if you subsequently re-use the same byte[] elsewhere
* it will alter this Fieldable's value.
* @param result User defined buffer that will be used if
* possible. If this is null or not large enough, a new
* buffer is allocated
* @return reference to the Field value as byte[].
*/
abstract byte[] getBinaryValue(byte[] result);
/** @see #setIndexOptions */
IndexOptions getIndexOptions();
/** Expert:
*
* If set, omit term freq, and optionally positions and payloads from
* postings for this field.
*
* <p><b>NOTE</b>: While this option reduces storage space
* required in the index, it also means any query
* requiring positional information, such as {@link
* PhraseQuery} or {@link SpanQuery} subclasses will
* fail with an exception.
*/
void setIndexOptions(IndexOptions indexOptions);
/**
* Returns the {@link PerDocFieldValues}
*/
public PerDocFieldValues getDocValues();
/**
* Sets the {@link PerDocFieldValues} for this field. If
* {@link PerDocFieldValues} is set this field will store per-document values
*
* @see IndexDocValues
*/
public void setDocValues(PerDocFieldValues docValues);
/**
* Returns <code>true</code> iff {@link PerDocFieldValues} are set on this
* field.
*/
public boolean hasDocValues();
/**
* Returns the {@link ValueType} of the set {@link PerDocFieldValues} or
* <code>null</code> if not set.
*/
public ValueType docValuesType();
}

View File

@ -20,16 +20,13 @@ import java.io.Reader;
import java.util.Comparator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
import org.apache.lucene.util.BytesRef;
/**
* <p>
* This class provides a {@link AbstractField} that enables storing of typed
* This class provides a {@link Field} that enables storing of typed
* per-document values for scoring, sorting or value retrieval. Here's an
* example usage, adding an int value:
*
@ -54,16 +51,14 @@ import org.apache.lucene.util.BytesRef;
* </pre>
*
* <p>
* If doc values are stored in addition to an indexed ({@link Index}) or stored
* ({@link Store}) value it's recommended to use the {@link IndexDocValuesField}'s
* {@link #set(AbstractField)} API:
* If doc values are stored in addition to an indexed ({@link FieldType#setIndexed(boolean)}) or stored
* ({@link FieldType#setStored(boolean)}) value it's recommended to pass the appropriate {@link FieldType}
* when creating the field:
*
* <pre>
* IndexDocValuesField field = new IndexDocValuesField(name);
* Field indexedField = new Field(name, stringValue, Stored.NO, Indexed.ANALYZED);
* IndexDocValuesField field = new IndexDocValuesField(name, StringField.TYPE_STORED);
* Document document = new Document();
* document.add(indexedField);
* field.set(indexedField);
* document.add(field);
* for(all documents) {
* ...
* field.setInt(value)
@ -73,7 +68,8 @@ import org.apache.lucene.util.BytesRef;
* </pre>
*
* */
public class IndexDocValuesField extends AbstractField implements PerDocFieldValues {
// TODO: maybe rename to DocValuesField?
public class IndexDocValuesField extends Field implements PerDocFieldValues {
protected BytesRef bytes;
protected double doubleValue;
@ -85,21 +81,27 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
* Creates a new {@link IndexDocValuesField} with the given name.
*/
public IndexDocValuesField(String name) {
super(name, Store.NO, Index.NO, TermVector.NO);
setDocValues(this);
this(name, new FieldType());
}
/**
* Creates a {@link IndexDocValuesField} prototype
*/
IndexDocValuesField() {
this("");
public IndexDocValuesField(String name, FieldType type) {
this(name, type, null);
}
public IndexDocValuesField(String name, FieldType type, String value) {
super(name, type);
fieldsData = value;
}
@Override
public PerDocFieldValues docValues() {
return this;
}
/**
* Sets the given <code>long</code> value and sets the field's {@link ValueType} to
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
* default type use {@link #setDocValuesType(ValueType)}.
*/
public void setInt(long value) {
setInt(value, false);
@ -124,7 +126,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
/**
* Sets the given <code>int</code> value and sets the field's {@link ValueType} to
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
* default type use {@link #setDocValuesType(ValueType)}.
*/
public void setInt(int value) {
setInt(value, false);
@ -149,7 +151,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
/**
* Sets the given <code>short</code> value and sets the field's {@link ValueType} to
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
* default type use {@link #setDocValuesType(ValueType)}.
*/
public void setInt(short value) {
setInt(value, false);
@ -174,11 +176,12 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
/**
* Sets the given <code>byte</code> value and sets the field's {@link ValueType} to
* {@link ValueType#VAR_INTS} unless already set. If you want to change the
* default type use {@link #setType(ValueType)}.
* default type use {@link #setDocValuesType(ValueType)}.
*/
public void setInt(byte value) {
setInt(value, false);
}
/**
* Sets the given <code>byte</code> value as a 8 bit signed integer.
*
@ -198,7 +201,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
/**
* Sets the given <code>float</code> value and sets the field's {@link ValueType}
* to {@link ValueType#FLOAT_32} unless already set. If you want to
* change the type use {@link #setType(ValueType)}.
* change the type use {@link #setDocValuesType(ValueType)}.
*/
public void setFloat(float value) {
if (type == null) {
@ -210,7 +213,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
/**
* Sets the given <code>double</code> value and sets the field's {@link ValueType}
* to {@link ValueType#FLOAT_64} unless already set. If you want to
* change the default type use {@link #setType(ValueType)}.
* change the default type use {@link #setDocValuesType(ValueType)}.
*/
public void setFloat(double value) {
if (type == null) {
@ -241,7 +244,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
if (value == null) {
throw new IllegalArgumentException("value must not be null");
}
setType(type);
setDocValuesType(type);
if (bytes == null) {
bytes = new BytesRef(value);
} else {
@ -289,20 +292,13 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
/**
* Sets the {@link ValueType} for this field.
*/
public void setType(ValueType type) {
public void setDocValuesType(ValueType type) {
if (type == null) {
throw new IllegalArgumentException("Type must not be null");
}
this.type = type;
}
/**
* Returns the field's {@link ValueType}
*/
public ValueType type() {
return type;
}
/**
* Returns always <code>null</code>
*/
@ -310,13 +306,6 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
return null;
}
/**
* Returns always <code>null</code>
*/
public String stringValue() {
return null;
}
/**
* Returns always <code>null</code>
*/
@ -324,25 +313,14 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
return null;
}
/**
* Sets this {@link IndexDocValuesField} to the given {@link AbstractField} and
* returns the given field. Any modifications to this instance will be visible
* to the given field.
*/
public <T extends AbstractField> T set(T field) {
field.setDocValues(this);
return field;
@Override
public ValueType docValuesType() {
return type;
}
/**
* Sets a new {@link PerDocFieldValues} instance on the given field with the
* given type and returns it.
*
*/
public static <T extends AbstractField> T set(T field, ValueType type) {
if (field instanceof IndexDocValuesField)
return field;
final IndexDocValuesField valField = new IndexDocValuesField();
@Override
public String toString() {
final String value;
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
@ -350,9 +328,43 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
BytesRef ref = field.isBinary() ? new BytesRef(field.getBinaryValue(),
field.getBinaryOffset(), field.getBinaryLength()) : new BytesRef(
field.stringValue());
value = "bytes:bytes.utf8ToString();";
break;
case VAR_INTS:
value = "int:" + longValue;
break;
case FLOAT_32:
value = "float32:" + doubleValue;
break;
case FLOAT_64:
value = "float64:" + doubleValue;
break;
default:
throw new IllegalArgumentException("unknown type: " + type);
}
return "<" + name() + ": IndexDocValuesField " + value + ">";
}
/**
* Returns an IndexDocValuesField holding the value from
* the provided string field, as the specified type. The
* incoming field must have a string value. The name, {@link
* FieldType} and string value are carried over from the
* incoming Field.
*/
public static IndexDocValuesField build(Field field, ValueType type) {
if (field instanceof IndexDocValuesField) {
return (IndexDocValuesField) field;
}
final IndexDocValuesField valField = new IndexDocValuesField(field.name(), field.getFieldType(), field.stringValue());
switch (type) {
case BYTES_FIXED_DEREF:
case BYTES_FIXED_SORTED:
case BYTES_FIXED_STRAIGHT:
case BYTES_VAR_DEREF:
case BYTES_VAR_SORTED:
case BYTES_VAR_STRAIGHT:
BytesRef ref = field.isBinary() ? field.binaryValue() : new BytesRef(field.stringValue());
valField.setBytes(ref, type);
break;
case VAR_INTS:
@ -367,7 +379,6 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal
default:
throw new IllegalArgumentException("unknown type: " + type);
}
return valField.set(field);
return valField;
}
}

View File

@ -22,28 +22,30 @@ import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.NumericTokenStream;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.document.NumericField.DataType;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.search.NumericRangeQuery; // javadocs
import org.apache.lucene.search.NumericRangeFilter; // javadocs
import org.apache.lucene.search.FieldCache; // javadocs
/**
* <p>This class provides a {@link Field} that enables indexing
* of numeric values for efficient range filtering and
* sorting. Here's an example usage, adding an int value:
* <p>
* This class provides a {@link Field} that enables indexing of numeric values
* for efficient range filtering and sorting. Here's an example usage, adding an
* int value:
*
* <pre>
* document.add(new NumericField(name).setIntValue(value));
* document.add(new NumericField(name).setIntValue(value));
* </pre>
*
* For optimal performance, re-use the
* <code>NumericField</code> and {@link Document} instance for more than
* one document:
*
*
* For optimal performance, re-use the <code>NumericField</code> and
* {@link Document} instance for more than one document:
*
* <pre>
* NumericField field = new NumericField(name);
* Document document = new Document();
* document.add(field);
*
*
* for(all documents) {
* ...
* field.setIntValue(value)
@ -74,7 +76,7 @@ import org.apache.lucene.search.FieldCache; // javadocs
*
* <p>By default, a <code>NumericField</code>'s value is not stored but
* is indexed for range filtering and sorting. You can use
* the {@link #NumericField(String,Field.Store,boolean)}
* the {@link #NumericField(String, FieldType)}
* constructor if you need to change these defaults.</p>
*
* <p>You may add the same field name as a <code>NumericField</code> to
@ -102,7 +104,7 @@ import org.apache.lucene.search.FieldCache; // javadocs
* default value, 4, was selected for a reasonable tradeoff
* of disk space consumption versus performance. You can
* use the expert constructor {@link
* #NumericField(String,int,Field.Store,boolean)} if you'd
* #NumericField(String,int,FieldType)} if you'd
* like to change the value. Note that you must also
* specify a congruent value when creating {@link
* NumericRangeQuery} or {@link NumericRangeFilter}.
@ -129,94 +131,136 @@ import org.apache.lucene.search.FieldCache; // javadocs
*
* @since 2.9
*/
public final class NumericField extends AbstractField {
public final class NumericField extends Field {
/** Data type of the value in {@link NumericField}.
* @since 3.2
*/
public static enum DataType { INT, LONG, FLOAT, DOUBLE }
private transient NumericTokenStream numericTS;
private DataType type;
private final int precisionStep;
public static final FieldType TYPE_UNSTORED = new FieldType();
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_UNSTORED.setIndexed(true);
TYPE_UNSTORED.setTokenized(true);
TYPE_UNSTORED.setOmitNorms(true);
TYPE_UNSTORED.setIndexOptions(IndexOptions.DOCS_ONLY);
TYPE_UNSTORED.freeze();
TYPE_STORED.setIndexed(true);
TYPE_STORED.setStored(true);
TYPE_STORED.setTokenized(true);
TYPE_STORED.setOmitNorms(true);
TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
TYPE_STORED.freeze();
}
//public static enum DataType { INT, LONG, FLOAT, DOUBLE }
private DataType dataType;
private transient NumericTokenStream numericTS;
private final int precisionStep;
/**
* Creates a field for numeric values using the default <code>precisionStep</code>
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
* a numeric value, before indexing a document containing this field,
* set a value using the various set<em>???</em>Value() methods.
* This constructor creates an indexed, but not stored field.
* @param name the field name
* Creates a field for numeric values using the default
* <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
* The instance is not yet initialized with a numeric value, before indexing a
* document containing this field, set a value using the various set
* <em>???</em>Value() methods. This constructor creates an indexed, but not
* stored field.
*
* @param name
* the field name
*/
public NumericField(String name) {
this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true);
this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.TYPE_UNSTORED);
}
/**
* Creates a field for numeric values using the default <code>precisionStep</code>
* {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with
* a numeric value, before indexing a document containing this field,
* set a value using the various set<em>???</em>Value() methods.
* @param name the field name
* @param store if the field should be stored, {@link Document#getFieldable}
* then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
* Creates a field for numeric values using the default
* <code>precisionStep</code> {@link NumericUtils#PRECISION_STEP_DEFAULT} (4).
* The instance is not yet initialized with a numeric value, before indexing a
* document containing this field, set a value using the various set
* <em>???</em>Value() methods.
*
* @param name
* the field name
* @param type
* if the defualt field should be altered, e.g. stored,
* {@link Document#getField} then returns {@code NumericField}
* instances on search results, or indexed using
* {@link NumericTokenStream}
*/
public NumericField(String name, Field.Store store, boolean index) {
this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index);
public NumericField(String name, FieldType type) {
this(name, NumericUtils.PRECISION_STEP_DEFAULT, type);
}
/**
* Creates a field for numeric values with the specified
* <code>precisionStep</code>. The instance is not yet initialized with
* a numeric value, before indexing a document containing this field,
* set a value using the various set<em>???</em>Value() methods.
* This constructor creates an indexed, but not stored field.
* @param name the field name
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
* <code>precisionStep</code>. The instance is not yet initialized with a
* numeric value, before indexing a document containing this field, set a
* value using the various set<em>???</em>Value() methods. This constructor
* creates an indexed, but not stored field.
*
* @param name
* the field name
* @param precisionStep
* the used <a
* href="../search/NumericRangeQuery.html#precisionStepDesc"
* >precision step</a>
*/
public NumericField(String name, int precisionStep) {
this(name, precisionStep, Field.Store.NO, true);
this(name, precisionStep, NumericField.TYPE_UNSTORED);
}
/**
* Creates a field for numeric values with the specified
* <code>precisionStep</code>. The instance is not yet initialized with
* a numeric value, before indexing a document containing this field,
* set a value using the various set<em>???</em>Value() methods.
* @param name the field name
* @param precisionStep the used <a href="../search/NumericRangeQuery.html#precisionStepDesc">precision step</a>
* @param store if the field should be stored, {@link Document#getFieldable}
* then returns {@code NumericField} instances on search results.
* @param index if the field should be indexed using {@link NumericTokenStream}
* <code>precisionStep</code>. The instance is not yet initialized with a
* numeric value, before indexing a document containing this field, set a
* value using the various set<em>???</em>Value() methods.
*
* @param name
* the field name
* @param precisionStep
* the used <a
* href="../search/NumericRangeQuery.html#precisionStepDesc"
* >precision step</a>
* @param type
* if the defualt field should be altered, e.g. stored,
* {@link Document#getField} then returns {@code NumericField}
* instances on search results, or indexed using
* {@link NumericTokenStream}
*/
public NumericField(String name, int precisionStep, Field.Store store, boolean index) {
super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO);
public NumericField(String name, int precisionStep, FieldType type) {
super(name, type);
this.precisionStep = precisionStep;
setIndexOptions(IndexOptions.DOCS_ONLY);
}
/** Returns a {@link NumericTokenStream} for indexing the numeric value. */
public TokenStream tokenStreamValue() {
if (!isIndexed())
return null;
public TokenStream tokenStreamValue() {
if (!indexed()) return null;
if (numericTS == null) {
// lazy init the TokenStream as it is heavy to instantiate (attributes,...),
// lazy init the TokenStream as it is heavy to instantiate
// (attributes,...),
// if not needed (stored field loading)
numericTS = new NumericTokenStream(precisionStep);
// initialize value in TokenStream
if (fieldsData != null) {
assert type != null;
assert dataType != null;
final Number val = (Number) fieldsData;
switch (type) {
switch (dataType) {
case INT:
numericTS.setIntValue(val.intValue()); break;
numericTS.setIntValue(val.intValue());
break;
case LONG:
numericTS.setLongValue(val.longValue()); break;
numericTS.setLongValue(val.longValue());
break;
case FLOAT:
numericTS.setFloatValue(val.floatValue()); break;
numericTS.setFloatValue(val.floatValue());
break;
case DOUBLE:
numericTS.setDoubleValue(val.doubleValue()); break;
numericTS.setDoubleValue(val.doubleValue());
break;
default:
assert false : "Should never get here";
}
@ -225,27 +269,28 @@ public final class NumericField extends AbstractField {
return numericTS;
}
/** Returns always <code>null</code> for numeric fields */
@Override
public byte[] getBinaryValue(byte[] result){
return null;
}
/** Returns always <code>null</code> for numeric fields */
public Reader readerValue() {
return null;
}
/** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)}
* on search results. It is recommended to use {@link Document#getFieldable} instead
* that returns {@code NumericField} instances. You can then use {@link #getNumericValue}
* to return the stored value. */
public String stringValue() {
/**
* Returns the numeric value as a string. It is recommended to
* use {@link Document#getField} instead that returns {@code NumericField}
* instances. You can then use {@link #numericValue} to return the stored
* value.
*/
@Override
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
/** Returns the current numeric value as a subclass of {@link Number}, <code>null</code> if not yet initialized. */
public Number getNumericValue() {
/**
* Returns the current numeric value as a subclass of {@link Number},
* <code>null</code> if not yet initialized.
*/
@Override
public Number numericValue() {
return (Number) fieldsData;
}
@ -254,63 +299,79 @@ public final class NumericField extends AbstractField {
return precisionStep;
}
/** Returns the data type of the current value, {@code null} if not yet set.
/**
* Returns the data type of the current value, {@code null} if not yet set.
*
* @since 3.2
*/
public DataType getDataType() {
return type;
@Override
public DataType numericDataType() {
return dataType;
}
@Override
public boolean numeric() {
return true;
}
/**
* Initializes the field with the supplied <code>long</code> value.
* @param value the numeric value
*
* @param value
* the numeric value
* @return this instance, because of this you can use it the following way:
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
* <code>document.add(new NumericField(name, precisionStep).setLongValue(value))</code>
*/
public NumericField setLongValue(final long value) {
if (numericTS != null) numericTS.setLongValue(value);
fieldsData = Long.valueOf(value);
type = DataType.LONG;
dataType = DataType.LONG;
return this;
}
/**
* Initializes the field with the supplied <code>int</code> value.
* @param value the numeric value
*
* @param value
* the numeric value
* @return this instance, because of this you can use it the following way:
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
* <code>document.add(new NumericField(name, precisionStep).setIntValue(value))</code>
*/
public NumericField setIntValue(final int value) {
if (numericTS != null) numericTS.setIntValue(value);
fieldsData = Integer.valueOf(value);
type = DataType.INT;
dataType = DataType.INT;
return this;
}
/**
* Initializes the field with the supplied <code>double</code> value.
* @param value the numeric value
*
* @param value
* the numeric value
* @return this instance, because of this you can use it the following way:
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
* <code>document.add(new NumericField(name, precisionStep).setDoubleValue(value))</code>
*/
public NumericField setDoubleValue(final double value) {
if (numericTS != null) numericTS.setDoubleValue(value);
fieldsData = Double.valueOf(value);
type = DataType.DOUBLE;
dataType = DataType.DOUBLE;
return this;
}
/**
* Initializes the field with the supplied <code>float</code> value.
* @param value the numeric value
*
* @param value
* the numeric value
* @return this instance, because of this you can use it the following way:
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
* <code>document.add(new NumericField(name, precisionStep).setFloatValue(value))</code>
*/
public NumericField setFloatValue(final float value) {
if (numericTS != null) numericTS.setFloatValue(value);
fieldsData = Float.valueOf(value);
type = DataType.FLOAT;
dataType = DataType.FLOAT;
return this;
}
}

View File

@ -0,0 +1,51 @@
package org.apache.lucene.document;
import org.apache.lucene.index.FieldInfo.IndexOptions;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public final class StringField extends Field {
public static final FieldType TYPE_UNSTORED = new FieldType();
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_UNSTORED.setIndexed(true);
TYPE_UNSTORED.setOmitNorms(true);
TYPE_UNSTORED.setIndexOptions(IndexOptions.DOCS_ONLY);
TYPE_UNSTORED.freeze();
TYPE_STORED.setIndexed(true);
TYPE_STORED.setStored(true);
TYPE_STORED.setOmitNorms(true);
TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY);
TYPE_STORED.freeze();
}
public StringField(String name, String value) {
super(name, TYPE_UNSTORED, value);
}
@Override
public String stringValue() {
return (fieldsData == null) ? null : fieldsData.toString();
}
public boolean isNumeric() {
return false;
}
}

View File

@ -0,0 +1,54 @@
package org.apache.lucene.document;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
public final class TextField extends Field {
public static final FieldType TYPE_UNSTORED = new FieldType();
public static final FieldType TYPE_STORED = new FieldType();
static {
TYPE_UNSTORED.setIndexed(true);
TYPE_UNSTORED.setTokenized(true);
TYPE_UNSTORED.freeze();
TYPE_STORED.setIndexed(true);
TYPE_STORED.setStored(true);
TYPE_STORED.setTokenized(true);
TYPE_STORED.freeze();
}
public TextField(String name, Reader reader) {
super(name, TextField.TYPE_UNSTORED, reader);
}
public TextField(String name, String value) {
super(name, TextField.TYPE_UNSTORED, value);
}
public TextField(String name, TokenStream stream) {
super(name, TextField.TYPE_UNSTORED, stream);
}
public boolean isNumeric() {
return false;
}
}

View File

@ -22,16 +22,16 @@
<body>
<p>The logical representation of a {@link org.apache.lucene.document.Document} for indexing and searching.</p>
<p>The document package provides the user level logical representation of content to be indexed and searched. The
package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.document.Fieldable}s.</p>
<h2>Document and Fieldable</h2>
<p>A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.document.Fieldable}s. A
{@link org.apache.lucene.document.Fieldable} is a logical representation of a user's content that needs to be indexed or stored.
{@link org.apache.lucene.document.Fieldable}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized,
stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.document.Fieldable}
package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.index.IndexableField}s.</p>
<h2>Document and IndexableField</h2>
<p>A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.index.IndexableField}s. A
{@link org.apache.lucene.index.IndexableField} is a logical representation of a user's content that needs to be indexed or stored.
{@link org.apache.lucene.index.IndexableField}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized,
stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.index.IndexableField}
for specifics on these properties.
</p>
<p>Note: it is common to refer to {@link org.apache.lucene.document.Document}s having {@link org.apache.lucene.document.Field}s, even though technically they have
{@link org.apache.lucene.document.Fieldable}s.</p>
{@link org.apache.lucene.index.IndexableField}s.</p>
<h2>Working with Documents</h2>
<p>First and foremost, a {@link org.apache.lucene.document.Document} is something created by the user application. It is your job
to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.)
@ -45,7 +45,7 @@ package also provides utilities for working with {@link org.apache.lucene.docume
to simplify indexing of numeric values (and also dates) for fast range range queries with {@link org.apache.lucene.search.NumericRangeQuery}
(using a special sortable string representation of numeric values).</p>
<p>The {@link org.apache.lucene.document.FieldSelector} class provides a mechanism to tell Lucene how to load Documents from
storage. If no FieldSelector is used, all Fieldables on a Document will be loaded. As an example of the FieldSelector usage, consider
storage. If no FieldSelector is used, all IndexableFields on a Document will be loaded. As an example of the FieldSelector usage, consider
the common use case of
displaying search results on a web page and then having users click through to see the full document. In this scenario, it is often
the case that there are many small fields and one or two large fields (containing the contents of the original file). Before the FieldSelector,

View File

@ -17,6 +17,16 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.document.FieldType; // for javadocs
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import java.io.File;
import java.io.IOException;
import java.io.PrintStream;
@ -28,21 +38,11 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.AbstractField; // for javadocs
import org.apache.lucene.document.Document;
import org.apache.lucene.index.codecs.BlockTreeTermsReader;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.values.IndexDocValues;
import org.apache.lucene.index.values.ValuesEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.StringHelper;
@ -189,7 +189,7 @@ public class CheckIndex {
/** True if at least one of the fields in this segment
* has position data
* @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
* @see FieldType#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */
public boolean hasProx;
/** Map that includes certain

View File

@ -29,8 +29,6 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.Lock;
@ -559,12 +557,11 @@ class DirectoryReader extends IndexReader implements Cloneable {
return maxDoc;
}
// inherit javadoc
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
int i = readerIndex(n); // find segment num
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
int i = readerIndex(docID); // find segment num
subReaders[i].document(docID - starts[i], visitor); // dispatch to segment reader
}
@Override

View File

@ -18,11 +18,10 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import org.apache.lucene.document.Fieldable;
abstract class DocFieldConsumerPerField {
/** Processes all occurrences of a single field */
abstract void processFields(Fieldable[] fields, int count) throws IOException;
abstract void processFields(IndexableField[] fields, int count) throws IOException;
abstract void abort();
abstract FieldInfo getFieldInfo();
}

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import org.apache.lucene.document.Fieldable;
final class DocFieldConsumersPerField extends DocFieldConsumerPerField {
@ -35,7 +34,7 @@ final class DocFieldConsumersPerField extends DocFieldConsumerPerField {
}
@Override
public void processFields(Fieldable[] fields, int count) throws IOException {
public void processFields(IndexableField[] fields, int count) throws IOException {
one.processFields(fields, count);
two.processFields(fields, count);
}

View File

@ -22,15 +22,13 @@ import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.DocumentsWriterPerThread.DocState;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.codecs.DocValuesConsumer;
import org.apache.lucene.index.codecs.PerDocConsumer;
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.IOUtils;
@ -199,22 +197,16 @@ final class DocFieldProcessor extends DocConsumer {
consumer.startDocument();
fieldsWriter.startDocument();
final Document doc = docState.doc;
fieldCount = 0;
final int thisFieldGen = fieldGen++;
final List<Fieldable> docFields = doc.getFields();
final int numDocFields = docFields.size();
// Absorb any new fields first seen in this document.
// Also absorb any changes to fields we had already
// seen before (eg suddenly turning on norms or
// vectors, etc.):
for(int i=0;i<numDocFields;i++) {
Fieldable field = docFields.get(i);
for(IndexableField field : docState.doc) {
final String fieldName = field.name();
// Make sure we have a PerField allocated
@ -231,21 +223,22 @@ final class DocFieldProcessor extends DocConsumer {
// needs to be more "pluggable" such that if I want
// to have a new "thing" my Fields can do, I can
// easily add it
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
FieldInfo fi = fieldInfos.addOrUpdate(fieldName, field.indexed(), field.storeTermVectors(),
field.storeTermVectorPositions(), field.storeTermVectorOffsets(),
field.omitNorms(), false, field.indexOptions(), field.docValuesType());
fp = new DocFieldProcessorPerField(this, fi);
fp.next = fieldHash[hashPos];
fieldHash[hashPos] = fp;
totalFieldCount++;
if (totalFieldCount >= fieldHash.length/2)
if (totalFieldCount >= fieldHash.length/2) {
rehash();
}
} else {
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(),
field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(),
field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType());
fieldInfos.addOrUpdate(fp.fieldInfo.name, field.indexed(), field.storeTermVectors(),
field.storeTermVectorPositions(), field.storeTermVectorOffsets(),
field.omitNorms(), false, field.indexOptions(), field.docValuesType());
}
if (thisFieldGen != fp.lastGen) {
@ -266,12 +259,12 @@ final class DocFieldProcessor extends DocConsumer {
fp.addField(field);
if (field.isStored()) {
if (field.stored()) {
fieldsWriter.addField(field, fp.fieldInfo);
}
if (field.hasDocValues()) {
final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo);
docValuesConsumer.add(docState.docID, field.getDocValues());
final PerDocFieldValues docValues = field.docValues();
if (docValues != null) {
docValuesConsumer(docState, fp.fieldInfo).add(docState.docID, docValues);
}
}
@ -339,5 +332,4 @@ final class DocFieldProcessor extends DocConsumer {
docValues.put(fieldInfo.name, docValuesConsumer);
return docValuesConsumer;
}
}

View File

@ -17,7 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@ -34,17 +33,17 @@ final class DocFieldProcessorPerField {
int lastGen = -1;
int fieldCount;
Fieldable[] fields = new Fieldable[1];
IndexableField[] fields = new IndexableField[1];
public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) {
this.consumer = docFieldProcessor.consumer.addField(fieldInfo);
this.fieldInfo = fieldInfo;
}
public void addField(Fieldable field) {
public void addField(IndexableField field) {
if (fieldCount == fields.length) {
int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
Fieldable[] newArray = new Fieldable[newSize];
IndexableField[] newArray = new IndexableField[newSize];
System.arraycopy(fields, 0, newArray, 0, fieldCount);
fields = newArray;
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
@ -61,27 +60,32 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
}
@Override
public void processFields(final Fieldable[] fields,
public void processFields(final IndexableField[] fields,
final int count) throws IOException {
fieldState.reset(docState.doc.getBoost());
fieldState.reset();
final boolean doInvert = consumer.start(fields, count);
for(int i=0;i<count;i++) {
final Fieldable field = fields[i];
final IndexableField field = fields[i];
// TODO FI: this should be "genericized" to querying
// consumer if it wants to see this particular field
// tokenized.
if (field.isIndexed() && doInvert) {
if (field.indexed() && doInvert) {
if (i > 0)
fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name);
if (!field.isTokenized()) { // un-tokenized field
String stringValue = field.stringValue();
// TODO (LUCENE-2309): this analysis logic should be
// outside of indexer -- field should simply give us
// a TokenStream, even for multi-valued fields
if (!field.tokenized()) { // un-tokenized field
final String stringValue = field.stringValue();
assert stringValue != null;
final int valueLength = stringValue.length();
parent.singleToken.reinit(stringValue, 0, valueLength);
fieldState.attributeSource = parent.singleToken;
@ -103,17 +107,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
final TokenStream stream;
final TokenStream streamValue = field.tokenStreamValue();
if (streamValue != null)
if (streamValue != null) {
stream = streamValue;
else {
} else {
// the field does not have a TokenStream,
// so we have to obtain one from the analyzer
final Reader reader; // find or make Reader
final Reader readerValue = field.readerValue();
if (readerValue != null)
if (readerValue != null) {
reader = readerValue;
else {
} else {
String stringValue = field.stringValue();
if (stringValue == null) {
throw new IllegalArgumentException("field must have either TokenStream, String or Reader value");
@ -189,7 +193,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField {
}
fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field);
fieldState.boost *= field.getBoost();
fieldState.boost *= field.boost();
}
// LUCENE-2387: don't hang onto the field, so GC can

View File

@ -0,0 +1,142 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Set;
import java.util.HashSet;
import org.apache.lucene.document.BinaryField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.store.IndexInput;
/** A {@link StoredFieldVisitor} that creates a {@link
* Document} containing all stored fields, or only specific
* requested fields provided to {@link #DocumentStoredFieldVisitor(Set)}
* This is used by {@link IndexReader#document(int)} to load a
* document.
*
* @lucene.experimental */
public class DocumentStoredFieldVisitor extends StoredFieldVisitor {
private final Document doc = new Document();
private final Set<String> fieldsToAdd;
/** Load only fields named in the provided <code>Set&lt;String&gt;</code>. */
public DocumentStoredFieldVisitor(Set<String> fieldsToAdd) {
this.fieldsToAdd = fieldsToAdd;
}
/** Load only fields named in the provided <code>Set&lt;String&gt;</code>. */
public DocumentStoredFieldVisitor(String... fields) {
fieldsToAdd = new HashSet<String>(fields.length);
for(String field : fields) {
fieldsToAdd.add(field);
}
}
/** Load all stored fields. */
public DocumentStoredFieldVisitor() {
this.fieldsToAdd = null;
}
@Override
public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException {
if (accept(fieldInfo)) {
final byte[] b = new byte[numBytes];
in.readBytes(b, 0, b.length);
doc.add(new BinaryField(fieldInfo.name, b));
} else {
in.seek(in.getFilePointer() + numBytes);
}
return false;
}
@Override
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
if (accept(fieldInfo)) {
final byte[] b = new byte[numUTF8Bytes];
in.readBytes(b, 0, b.length);
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.storeTermVector);
ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector);
ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector);
ft.setStoreTermVectors(fieldInfo.storeTermVector);
ft.setOmitNorms(fieldInfo.omitNorms);
ft.setIndexOptions(fieldInfo.indexOptions);
doc.add(new Field(fieldInfo.name,
ft,
new String(b, "UTF-8")));
} else {
in.seek(in.getFilePointer() + numUTF8Bytes);
}
return false;
}
@Override
public boolean intField(FieldInfo fieldInfo, int value) {
if (accept(fieldInfo)) {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
doc.add(new NumericField(fieldInfo.name, ft).setIntValue(value));
}
return false;
}
@Override
public boolean longField(FieldInfo fieldInfo, long value) {
if (accept(fieldInfo)) {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
doc.add(new NumericField(fieldInfo.name, ft).setLongValue(value));
}
return false;
}
@Override
public boolean floatField(FieldInfo fieldInfo, float value) {
if (accept(fieldInfo)) {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
doc.add(new NumericField(fieldInfo.name, ft).setFloatValue(value));
}
return false;
}
@Override
public boolean doubleField(FieldInfo fieldInfo, double value) {
if (accept(fieldInfo)) {
FieldType ft = new FieldType(NumericField.TYPE_STORED);
ft.setIndexed(fieldInfo.isIndexed);
doc.add(new NumericField(fieldInfo.name, ft).setDoubleValue(value));
}
return false;
}
private boolean accept(FieldInfo fieldInfo) {
return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name);
}
public Document getDocument() {
return doc;
}
}

View File

@ -27,7 +27,6 @@ import java.util.Queue;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
@ -320,7 +319,7 @@ final class DocumentsWriter {
return maybeMerge;
}
boolean updateDocuments(final Iterable<Document> docs, final Analyzer analyzer,
boolean updateDocuments(final Iterable<? extends Iterable<? extends IndexableField>> docs, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();
@ -351,7 +350,7 @@ final class DocumentsWriter {
return postUpdate(flushingDWPT, maybeMerge);
}
boolean updateDocument(final Document doc, final Analyzer analyzer,
boolean updateDocument(final Iterable<? extends IndexableField> doc, final Analyzer analyzer,
final Term delTerm) throws CorruptIndexException, IOException {
boolean maybeMerge = preUpdate();

View File

@ -26,7 +26,6 @@ import java.text.NumberFormat;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice;
import org.apache.lucene.search.SimilarityProvider;
import org.apache.lucene.store.Directory;
@ -90,7 +89,7 @@ public class DocumentsWriterPerThread {
PrintStream infoStream;
SimilarityProvider similarityProvider;
int docID;
Document doc;
Iterable<? extends IndexableField> doc;
String maxTermPrefix;
DocState(DocumentsWriterPerThread docWriter) {
@ -213,7 +212,7 @@ public class DocumentsWriterPerThread {
return retval;
}
public void updateDocument(Document doc, Analyzer analyzer, Term delTerm) throws IOException {
public void updateDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer, Term delTerm) throws IOException {
assert writer.testPoint("DocumentsWriterPerThread addDocument start");
assert deleteQueue != null;
docState.doc = doc;
@ -263,7 +262,7 @@ public class DocumentsWriterPerThread {
finishDocument(delTerm);
}
public int updateDocuments(Iterable<Document> docs, Analyzer analyzer, Term delTerm) throws IOException {
public int updateDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer, Term delTerm) throws IOException {
assert writer.testPoint("DocumentsWriterPerThread addDocuments start");
assert deleteQueue != null;
docState.analyzer = analyzer;
@ -280,7 +279,7 @@ public class DocumentsWriterPerThread {
}
int docCount = 0;
try {
for(Document doc : docs) {
for(Iterable<? extends IndexableField> doc : docs) {
docState.doc = doc;
docState.docID = numDocsInRAM;
docCount++;

View File

@ -30,9 +30,9 @@ public final class FieldInfo {
// true if term vector for this field should be stored
boolean storeTermVector;
boolean storeOffsetWithTermVector;
boolean storePositionWithTermVector;
public boolean storeTermVector;
public boolean storeOffsetWithTermVector;
public boolean storePositionWithTermVector;
public boolean omitNorms; // omit norms associated with indexed fields
public IndexOptions indexOptions;

View File

@ -39,8 +39,8 @@ import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.CodecUtil;
/** Access to the Fieldable Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Fieldable Info file. Objects
/** Access to the Field Info file that describes document fields and whether or
* not they are indexed. Each segment has a separate Field Info file. Objects
* of this class are thread-safe for multiple readers, but only one thread can
* be adding documents at a time, with no other reader or writer threads
* accessing this object.
@ -381,7 +381,7 @@ public final class FieldInfos implements Iterable<FieldInfo> {
/**
* Calls 5 parameter add with false for all TermVector parameters.
*
* @param name The name of the Fieldable
* @param name The name of the IndexableField
* @param isIndexed true if the field is indexed
* @see #addOrUpdate(String, boolean, boolean, boolean, boolean)
*/

View File

@ -50,14 +50,14 @@ public final class FieldInvertState {
* Re-initialize the state, using this boost value.
* @param docBoost boost value to use.
*/
void reset(float docBoost) {
void reset() {
position = 0;
length = 0;
numOverlap = 0;
offset = 0;
maxTermFrequency = 0;
uniqueTermCount = 0;
boost = docBoost;
boost = 1.0f;
attributeSource = null;
}

View File

@ -17,16 +17,9 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.AbstractField;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import java.io.IOException;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
@ -34,9 +27,6 @@ import org.apache.lucene.util.CloseableThreadLocal;
import org.apache.lucene.util.IOUtils;
import java.io.Closeable;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
/**
* Class responsible for access to stored document fields.
@ -49,7 +39,8 @@ public final class FieldsReader implements Cloneable, Closeable {
private final static int FORMAT_SIZE = 4;
private final FieldInfos fieldInfos;
private CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
// The main fieldStream, used only for cloning.
private final IndexInput cloneableFieldsStream;
@ -68,7 +59,6 @@ public final class FieldsReader implements Cloneable, Closeable {
// file. This will be 0 if we have our own private file.
private int docStoreOffset;
private CloseableThreadLocal<IndexInput> fieldsStreamTL = new CloseableThreadLocal<IndexInput>();
private boolean isOriginal = false;
/** Returns a cloned FieldsReader that shares open
@ -200,50 +190,52 @@ public final class FieldsReader implements Cloneable, Closeable {
indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L);
}
public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
seekIndex(n);
long position = indexStream.readLong();
fieldsStream.seek(position);
fieldsStream.seek(indexStream.readLong());
Document doc = new Document();
int numFields = fieldsStream.readVInt();
out: for (int i = 0; i < numFields; i++) {
final int numFields = fieldsStream.readVInt();
for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) {
int fieldNumber = fieldsStream.readVInt();
FieldInfo fi = fieldInfos.fieldInfo(fieldNumber);
FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name);
FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
int bits = fieldsStream.readByte() & 0xFF;
assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits);
boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0;
boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
final boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0;
final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK;
switch (acceptField) {
case LOAD:
addField(doc, fi, binary, tokenize, numeric);
final boolean doStop;
if (binary) {
final int numBytes = fieldsStream.readVInt();
doStop = visitor.binaryField(fieldInfo, fieldsStream, numBytes);
} else if (numeric != 0) {
switch(numeric) {
case FieldsWriter.FIELD_IS_NUMERIC_INT:
doStop = visitor.intField(fieldInfo, fieldsStream.readInt());
break;
case LOAD_AND_BREAK:
addField(doc, fi, binary, tokenize, numeric);
break out; //Get out of this loop
case LAZY_LOAD:
addFieldLazy(doc, fi, binary, tokenize, true, numeric);
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
doStop = visitor.longField(fieldInfo, fieldsStream.readLong());
break;
case LATENT:
addFieldLazy(doc, fi, binary, tokenize, false, numeric);
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
doStop = visitor.floatField(fieldInfo, Float.intBitsToFloat(fieldsStream.readInt()));
break;
case SIZE:
skipFieldBytes(addFieldSize(doc, fi, binary, numeric));
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
doStop = visitor.doubleField(fieldInfo, Double.longBitsToDouble(fieldsStream.readLong()));
break;
case SIZE_AND_BREAK:
addFieldSize(doc, fi, binary, numeric);
break out; //Get out of this loop
default:
skipField(numeric);
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
}
} else {
// Text:
final int numUTF8Bytes = fieldsStream.readVInt();
doStop = visitor.stringField(fieldInfo, fieldsStream, numUTF8Bytes);
}
if (doStop) {
return;
}
}
return doc;
}
/** Returns the length in bytes of each raw document in a
@ -300,225 +292,4 @@ public final class FieldsReader implements Cloneable, Closeable {
private void skipFieldBytes(int toRead) throws IOException {
fieldsStream.seek(fieldsStream.getFilePointer() + toRead);
}
private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException {
assert numeric != 0;
switch(numeric) {
case FieldsWriter.FIELD_IS_NUMERIC_INT:
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt());
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong());
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt()));
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong()));
default:
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
}
}
private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult, int numeric) throws IOException {
final AbstractField f;
if (binary) {
int toRead = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult);
//Need to move the pointer ahead by toRead positions
fieldsStream.seek(pointer + toRead);
} else if (numeric != 0) {
f = loadNumericField(fi, numeric);
} else {
Field.Store store = Field.Store.YES;
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
int length = fieldsStream.readVInt();
long pointer = fieldsStream.getFilePointer();
//Skip ahead of where we are by the length of what is stored
fieldsStream.seek(pointer+length);
f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult);
}
f.setOmitNorms(fi.omitNorms);
f.setIndexOptions(fi.indexOptions);
doc.add(f);
}
private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize, int numeric) throws CorruptIndexException, IOException {
final AbstractField f;
if (binary) {
int toRead = fieldsStream.readVInt();
final byte[] b = new byte[toRead];
fieldsStream.readBytes(b, 0, b.length);
f = new Field(fi.name, b);
} else if (numeric != 0) {
f = loadNumericField(fi, numeric);
} else {
Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize);
Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector);
f = new Field(fi.name, // name
fieldsStream.readString(), // read value
Field.Store.YES,
index,
termVector);
}
f.setIndexOptions(fi.indexOptions);
f.setOmitNorms(fi.omitNorms);
doc.add(f);
}
// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes)
// Read just the size -- caller must skip the field content to continue reading fields
// Return the size in bytes or chars, depending on field type
private int addFieldSize(Document doc, FieldInfo fi, boolean binary, int numeric) throws IOException {
final int bytesize, size;
switch(numeric) {
case 0:
size = fieldsStream.readVInt();
bytesize = binary ? size : 2*size;
break;
case FieldsWriter.FIELD_IS_NUMERIC_INT:
case FieldsWriter.FIELD_IS_NUMERIC_FLOAT:
size = bytesize = 4;
break;
case FieldsWriter.FIELD_IS_NUMERIC_LONG:
case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE:
size = bytesize = 8;
break;
default:
throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric));
}
byte[] sizebytes = new byte[4];
sizebytes[0] = (byte) (bytesize>>>24);
sizebytes[1] = (byte) (bytesize>>>16);
sizebytes[2] = (byte) (bytesize>>> 8);
sizebytes[3] = (byte) bytesize ;
doc.add(new Field(fi.name, sizebytes));
return size;
}
/**
* A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is
* loaded.
*/
private class LazyField extends AbstractField implements Fieldable {
private int toRead;
private long pointer;
private final boolean cacheResult;
public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
super(name, store, Field.Index.NO, Field.TermVector.NO);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
this.cacheResult = cacheResult;
if (isBinary)
binaryLength = toRead;
lazy = true;
}
public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean cacheResult) {
super(name, store, index, termVector);
this.toRead = toRead;
this.pointer = pointer;
this.isBinary = isBinary;
this.cacheResult = cacheResult;
if (isBinary)
binaryLength = toRead;
lazy = true;
}
private IndexInput getFieldStream() {
IndexInput localFieldsStream = fieldsStreamTL.get();
if (localFieldsStream == null) {
localFieldsStream = (IndexInput) cloneableFieldsStream.clone();
fieldsStreamTL.set(localFieldsStream);
}
return localFieldsStream;
}
/** The value of the field as a Reader, or null. If null, the String value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public Reader readerValue() {
ensureOpen();
return null;
}
/** The value of the field as a TokenStream, or null. If null, the Reader value,
* String value, or binary value is used. Exactly one of stringValue(),
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public TokenStream tokenStreamValue() {
ensureOpen();
return null;
}
/** The value of the field as a String, or null. If null, the Reader value,
* binary value, or TokenStream value is used. Exactly one of stringValue(),
* readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */
public String stringValue() {
ensureOpen();
if (isBinary)
return null;
else {
if (fieldsData == null) {
String result = null;
IndexInput localFieldsStream = getFieldStream();
try {
localFieldsStream.seek(pointer);
byte[] bytes = new byte[toRead];
localFieldsStream.readBytes(bytes, 0, toRead);
result = new String(bytes, "UTF-8");
} catch (IOException e) {
throw new FieldReaderException(e);
}
if (cacheResult == true){
fieldsData = result;
}
return result;
} else {
return (String) fieldsData;
}
}
}
@Override
public byte[] getBinaryValue(byte[] result) {
ensureOpen();
if (isBinary) {
if (fieldsData == null) {
// Allocate new buffer if result is null or too small
final byte[] b;
if (result == null || result.length < toRead)
b = new byte[toRead];
else
b = result;
IndexInput localFieldsStream = getFieldStream();
// Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people
// since they are already handling this exception when getting the document
try {
localFieldsStream.seek(pointer);
localFieldsStream.readBytes(b, 0, toRead);
} catch (IOException e) {
throw new FieldReaderException(e);
}
binaryOffset = 0;
binaryLength = toRead;
if (cacheResult == true){
fieldsData = b;
}
return b;
} else {
return (byte[]) fieldsData;
}
} else
return null;
}
}
}

View File

@ -17,19 +17,16 @@ package org.apache.lucene.index;
*/
import java.io.IOException;
import java.util.List;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
final class FieldsWriter {
static final int FIELD_IS_TOKENIZED = 1 << 0;
// NOTE: bit 0 is free here! You can steal it!
static final int FIELD_IS_BINARY = 1 << 1;
// the old bit 1 << 2 was compressed, is now left out
@ -138,15 +135,17 @@ final class FieldsWriter {
}
}
final void writeField(int fieldNumber, Fieldable field) throws IOException {
final void writeField(int fieldNumber, IndexableField field) throws IOException {
fieldsStream.writeVInt(fieldNumber);
int bits = 0;
if (field.isTokenized())
bits |= FIELD_IS_TOKENIZED;
if (field.isBinary())
bits |= FIELD_IS_BINARY;
if (field instanceof NumericField) {
switch (((NumericField) field).getDataType()) {
final BytesRef bytes;
final String string;
// TODO: maybe a field should serialize itself?
// this way we don't bake into indexer all these
// specific encodings for different fields? and apps
// can customize...
if (field.numeric()) {
switch (field.numericDataType()) {
case INT:
bits |= FIELD_IS_NUMERIC_INT; break;
case LONG:
@ -158,23 +157,31 @@ final class FieldsWriter {
default:
assert false : "Should never get here";
}
string = null;
bytes = null;
} else {
bytes = field.binaryValue();
if (bytes != null) {
bits |= FIELD_IS_BINARY;
string = null;
} else {
string = field.stringValue();
}
}
fieldsStream.writeByte((byte) bits);
if (field.isBinary()) {
final byte[] data;
final int len;
final int offset;
data = field.getBinaryValue();
len = field.getBinaryLength();
offset = field.getBinaryOffset();
fieldsStream.writeVInt(len);
fieldsStream.writeBytes(data, offset, len);
} else if (field instanceof NumericField) {
final NumericField nf = (NumericField) field;
final Number n = nf.getNumericValue();
switch (nf.getDataType()) {
if (bytes != null) {
fieldsStream.writeVInt(bytes.length);
fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length);
} else if (string != null) {
fieldsStream.writeString(field.stringValue());
} else {
final Number n = field.numericValue();
if (n == null) {
throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue");
}
switch (field.numericDataType()) {
case INT:
fieldsStream.writeInt(n.intValue()); break;
case LONG:
@ -186,8 +193,6 @@ final class FieldsWriter {
default:
assert false : "Should never get here";
}
} else {
fieldsStream.writeString(field.stringValue());
}
}
@ -207,21 +212,21 @@ final class FieldsWriter {
assert fieldsStream.getFilePointer() == position;
}
final void addDocument(Document doc, FieldInfos fieldInfos) throws IOException {
final void addDocument(Iterable<? extends IndexableField> doc, FieldInfos fieldInfos) throws IOException {
indexStream.writeLong(fieldsStream.getFilePointer());
int storedCount = 0;
List<Fieldable> fields = doc.getFields();
for (Fieldable field : fields) {
if (field.isStored())
storedCount++;
for (IndexableField field : doc) {
if (field.stored()) {
storedCount++;
}
}
fieldsStream.writeVInt(storedCount);
for (Fieldable field : fields) {
if (field.isStored())
for (IndexableField field : doc) {
if (field.stored()) {
writeField(fieldInfos.fieldNumber(field.name()), field);
}
}
}
}

View File

@ -17,9 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -351,9 +350,9 @@ public class FilterIndexReader extends IndexReader {
}
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
return in.document(n, fieldSelector);
in.document(docID, visitor);
}
@Override

View File

@ -22,7 +22,6 @@ import java.util.Comparator;
import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.FieldsConsumer;
import org.apache.lucene.index.codecs.PostingsConsumer;
@ -82,15 +81,17 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem
}
@Override
boolean start(Fieldable[] fields, int count) {
for(int i=0;i<count;i++)
if (fields[i].isIndexed())
boolean start(IndexableField[] fields, int count) {
for(int i=0;i<count;i++) {
if (fields[i].indexed()) {
return true;
}
}
return false;
}
@Override
void start(Fieldable f) {
void start(IndexableField f) {
if (fieldState.attributeSource.hasAttribute(PayloadAttribute.class)) {
payloadAttribute = fieldState.attributeSource.getAttribute(PayloadAttribute.class);
} else {

View File

@ -17,29 +17,28 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.Similarity;
import org.apache.lucene.index.codecs.Codec;
import org.apache.lucene.index.codecs.CodecProvider;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.values.IndexDocValues;
import org.apache.lucene.search.FieldCache; // javadocs
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.*;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.ReaderUtil; // for javadocs
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.Closeable;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
/** IndexReader is an abstract class, providing an interface for accessing an
index. Search of an index is done entirely through this abstract interface,
so that any subclass which implements it is searchable.
@ -859,7 +858,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
* @return array of term frequency vectors. May be null if no term vectors have been
* stored for the specified document.
* @throws IOException if index cannot be accessed
* @see org.apache.lucene.document.Field.TermVector
*/
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
throws IOException;
@ -877,7 +875,6 @@ public abstract class IndexReader implements Cloneable,Closeable {
* @return term frequency vector May be null if field does not exist in the specified
* document or term vector was not stored.
* @throws IOException if index cannot be accessed
* @see org.apache.lucene.document.Field.TermVector
*/
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
throws IOException;
@ -946,57 +943,42 @@ public abstract class IndexReader implements Cloneable,Closeable {
return maxDoc() - numDocs();
}
/** Expert: visits the fields of a stored document, for
* custom processing/loading of each field. If you
* simply want to load all fields, use {@link
* #document(int)}. If you want to load a subset, use
* {@link DocumentStoredFieldVisitor}. */
public abstract void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException;
/**
* Returns the stored fields of the <code>n</code><sup>th</sup>
* <code>Document</code> in this index.
* <code>Document</code> in this index. This is just
* sugar for using {@link DocumentStoredFieldVisitor}.
* <p>
* <b>NOTE:</b> for performance reasons, this method does not check if the
* requested document is deleted, and therefore asking for a deleted document
* may yield unspecified results. Usually this is not required, however you
* can test if the doc is deleted by checking the {@link
* Bits} returned from {@link MultiFields#getLiveDocs}.
*
* <b>NOTE:</b> only the content of a field is returned,
* if that field was stored during indexing. Metadata
* like boost, omitNorm, IndexOptions, tokenized, etc.,
* are not preserved.
*
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public Document document(int n) throws CorruptIndexException, IOException {
// TODO: we need a separate StoredField, so that the
// Document returned here contains that class not
// IndexableField
public Document document(int docID) throws CorruptIndexException, IOException {
ensureOpen();
return document(n, null);
final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor();
document(docID, visitor);
return visitor.getDocument();
}
/**
* Get the {@link org.apache.lucene.document.Document} at the <code>n</code>
* <sup>th</sup> position. The {@link FieldSelector} may be used to determine
* what {@link org.apache.lucene.document.Field}s to load and how they should
* be loaded. <b>NOTE:</b> If this Reader (more specifically, the underlying
* <code>FieldsReader</code>) is closed before the lazy
* {@link org.apache.lucene.document.Field} is loaded an exception may be
* thrown. If you want the value of a lazy
* {@link org.apache.lucene.document.Field} to be available after closing you
* must explicitly load it or fetch the Document again with a new loader.
* <p>
* <b>NOTE:</b> for performance reasons, this method does not check if the
* requested document is deleted, and therefore asking for a deleted document
* may yield unspecified results. Usually this is not required, however you
* can test if the doc is deleted by checking the {@link
* Bits} returned from {@link MultiFields#getLiveDocs}.
*
* @param n Get the document at the <code>n</code><sup>th</sup> position
* @param fieldSelector The {@link FieldSelector} to use to determine what
* Fields should be loaded on the Document. May be null, in which case
* all Fields will be loaded.
* @return The stored fields of the
* {@link org.apache.lucene.document.Document} at the nth position
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
* @see org.apache.lucene.document.Fieldable
* @see org.apache.lucene.document.FieldSelector
* @see org.apache.lucene.document.SetBasedFieldSelector
* @see org.apache.lucene.document.LoadFirstFieldSelector
*/
// TODO (1.5): When we convert to JDK 1.5 make this Set<String>
public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException;
/** Returns true if any documents have been deleted */
public abstract boolean hasDeletions();
@ -1017,8 +999,8 @@ public abstract class IndexReader implements Cloneable,Closeable {
public abstract byte[] norms(String field) throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
* document. By default, The norm represents the product of the field's {@link
* org.apache.lucene.document.Fieldable#setBoost(float) boost} and its
* document. By default, the norm represents the product of the field's {@link
* org.apache.lucene.document.Field#setBoost(float) boost} and its
* length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*

View File

@ -36,7 +36,6 @@ import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment;
import org.apache.lucene.index.FieldInfos.FieldNumberBiMap;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -74,10 +73,10 @@ import org.apache.lucene.util.TwoPhaseCommit;
new index if there is not already an index at the provided path
and otherwise open the existing index.</p>
<p>In either case, documents are added with {@link #addDocument(Document)
<p>In either case, documents are added with {@link #addDocument(Iterable)
addDocument} and removed with {@link #deleteDocuments(Term)} or {@link
#deleteDocuments(Query)}. A document can be updated with {@link
#updateDocument(Term, Document) updateDocument} (which just deletes
#updateDocument(Term, Iterable) updateDocument} (which just deletes
and then adds the entire document). When finished adding, deleting
and updating documents, {@link #close() close} should be called.</p>
@ -1281,7 +1280,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public void addDocument(Document doc) throws CorruptIndexException, IOException {
public void addDocument(Iterable<? extends IndexableField> doc) throws CorruptIndexException, IOException {
addDocument(doc, analyzer);
}
@ -1289,7 +1288,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
* Adds a document to this index, using the provided analyzer instead of the
* value of {@link #getAnalyzer()}.
*
* <p>See {@link #addDocument(Document)} for details on
* <p>See {@link #addDocument(Iterable)} for details on
* index and IndexWriter state after an Exception, and
* flushing/merging temporary free space requirements.</p>
*
@ -1300,7 +1299,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException {
public void addDocument(Iterable<? extends IndexableField> doc, Analyzer analyzer) throws CorruptIndexException, IOException {
updateDocument(null, doc, analyzer);
}
@ -1318,7 +1317,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
* compression), in which case you may need to fully
* re-index your documents at that time.
*
* <p>See {@link #addDocument(Document)} for details on
* <p>See {@link #addDocument(Iterable)} for details on
* index and IndexWriter state after an Exception, and
* flushing/merging temporary free space requirements.</p>
*
@ -1338,7 +1337,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
*
* @lucene.experimental
*/
public void addDocuments(Iterable<Document> docs) throws CorruptIndexException, IOException {
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs) throws CorruptIndexException, IOException {
addDocuments(docs, analyzer);
}
@ -1353,7 +1352,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
*
* @lucene.experimental
*/
public void addDocuments(Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
public void addDocuments(Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
updateDocuments(null, docs, analyzer);
}
@ -1370,7 +1369,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
*
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<Document> docs) throws CorruptIndexException, IOException {
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs) throws CorruptIndexException, IOException {
updateDocuments(delTerm, docs, analyzer);
}
@ -1388,7 +1387,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
*
* @lucene.experimental
*/
public void updateDocuments(Term delTerm, Iterable<Document> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
public void updateDocuments(Term delTerm, Iterable<? extends Iterable<? extends IndexableField>> docs, Analyzer analyzer) throws CorruptIndexException, IOException {
ensureOpen();
try {
boolean success = false;
@ -1511,7 +1510,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException {
public void updateDocument(Term term, Iterable<? extends IndexableField> doc) throws CorruptIndexException, IOException {
ensureOpen();
updateDocument(term, doc, getAnalyzer());
}
@ -1534,7 +1533,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
* @throws CorruptIndexException if the index is corrupt
* @throws IOException if there is a low-level IO error
*/
public void updateDocument(Term term, Document doc, Analyzer analyzer)
public void updateDocument(Term term, Iterable<? extends IndexableField> doc, Analyzer analyzer)
throws CorruptIndexException, IOException {
ensureOpen();
try {
@ -3034,7 +3033,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
DocumentsWriter getDocsWriter() {
boolean test = false;
assert test = true;
return test?docWriter: null;
return test ? docWriter : null;
}
/** Expert: Return the number of documents currently

View File

@ -0,0 +1,104 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.Reader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.values.PerDocFieldValues;
import org.apache.lucene.index.values.ValueType;
import org.apache.lucene.util.BytesRef;
// TODO: how to handle versioning here...?
// TODO: we need to break out separate StoredField...
/** Represents a single field for indexing. IndexWriter
* consumes Iterable<IndexableField> as a document.
*
* @lucene.experimental */
public interface IndexableField {
// TODO: add attrs to this API?
/* Field name */
public String name();
// NOTE: if doc/field impl has the notion of "doc level boost"
// it must be multiplied in w/ this field's boost
/** Field boost (you must pre-multiply in any doc boost). */
public float boost();
/* True if the field's value should be stored */
public boolean stored();
/* Non-null if this field has a binary value */
public BytesRef binaryValue();
/* Non-null if this field has a string value */
public String stringValue();
/* Non-null if this field has a Reader value */
public Reader readerValue();
/* Non-null if this field has a pre-tokenized ({@link TokenStream}) value */
public TokenStream tokenStreamValue();
// Numeric field:
/* True if this field is numeric */
public boolean numeric();
/* Numeric {@link NumericField.DataType}; only used if
* the field is numeric */
public NumericField.DataType numericDataType();
/* Numeric value; only used if the field is numeric */
public Number numericValue();
/* True if this field should be indexed (inverted) */
public boolean indexed();
/* True if this field's value should be analyzed */
public boolean tokenized();
/* True if norms should not be indexed */
public boolean omitNorms();
/* {@link IndexOptions}, describing what should be
* recorded into the inverted index */
public IndexOptions indexOptions();
/* True if term vectors should be indexed */
public boolean storeTermVectors();
/* True if term vector offsets should be indexed */
public boolean storeTermVectorOffsets();
/* True if term vector positions should be indexed */
public boolean storeTermVectorPositions();
/* Non-null if doc values should be indexed */
public PerDocFieldValues docValues();
/* DocValues type; only used if docValues is non-null */
public ValueType docValuesType();
}

View File

@ -19,24 +19,22 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.document.Fieldable;
abstract class InvertedDocConsumerPerField {
// Called once per field, and is given all Fieldable
// Called once per field, and is given all IndexableField
// occurrences for this field in the document. Return
// true if you wish to see inverted tokens for these
// fields:
abstract boolean start(Fieldable[] fields, int count) throws IOException;
abstract boolean start(IndexableField[] fields, int count) throws IOException;
// Called before a field instance is being processed
abstract void start(Fieldable field);
abstract void start(IndexableField field);
// Called once per inverted token
abstract void add() throws IOException;
// Called once per field per document, after all Fieldable
// occurrences are inverted
// Called once per field per document, after all IndexableFields
// are inverted
abstract void finish() throws IOException;
// Called on hitting an aborting exception

View File

@ -22,8 +22,6 @@ import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -258,12 +256,11 @@ public class MultiReader extends IndexReader implements Cloneable {
return maxDoc;
}
// inherit javadoc
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
int i = readerIndex(n); // find segment num
return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
int i = readerIndex(docID); // find segment num
subReaders[i].document(docID - starts[i], visitor); // dispatch to segment reader
}
@Override

View File

@ -17,10 +17,6 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.document.FieldSelectorResult;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.values.IndexDocValues;
import org.apache.lucene.util.Bits;
@ -351,30 +347,12 @@ public class ParallelReader extends IndexReader {
hasDeletions = false;
}
// append fields from storedFieldReaders
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
Document result = new Document();
for (final IndexReader reader: storedFieldReaders) {
boolean include = (fieldSelector==null);
if (!include) {
Collection<String> fields = readerToFields.get(reader);
for (final String field : fields)
if (fieldSelector.accept(field) != FieldSelectorResult.NO_LOAD) {
include = true;
break;
}
}
if (include) {
List<Fieldable> fields = reader.document(n, fieldSelector).getFields();
for (Fieldable field : fields) {
result.add(field);
}
}
reader.document(docID, visitor);
}
return result;
}
// get all vectors

View File

@ -25,9 +25,7 @@ import java.util.Map.Entry;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
@ -71,12 +69,11 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
// index is allowed to have exactly one document or 0.
if (numDocs == 1) {
Document doc = r.document(r.maxDoc() - 1);
Field sid = doc.getField(SNAPSHOTS_ID);
if (sid == null) {
if (doc.getField(SNAPSHOTS_ID) == null) {
throw new IllegalStateException("directory is not a valid snapshots store!");
}
doc.removeField(SNAPSHOTS_ID);
for (Fieldable f : doc.getFields()) {
for (IndexableField f : doc) {
snapshots.put(f.name(), f.stringValue());
}
} else if (numDocs != 0) {
@ -189,12 +186,14 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy {
private void persistSnapshotInfos(String id, String segment) throws IOException {
writer.deleteAll();
Document d = new Document();
d.add(new Field(SNAPSHOTS_ID, "", Store.YES, Index.NO));
FieldType ft = new FieldType();
ft.setStored(true);
d.add(new Field(SNAPSHOTS_ID, ft, ""));
for (Entry<String, String> e : super.getSnapshots().entrySet()) {
d.add(new Field(e.getKey(), e.getValue(), Store.YES, Index.NO));
d.add(new Field(e.getKey(), ft, e.getValue()));
}
if (id != null) {
d.add(new Field(id, segment, Store.YES, Index.NO));
d.add(new Field(id, ft, segment));
}
writer.addDocument(d);
writer.commit();

View File

@ -335,6 +335,10 @@ final class SegmentMerger {
// skip deleted docs
continue;
}
// TODO: this could be more efficient using
// FieldVisitor instead of loading/writing entire
// doc; ie we just have to renumber the field number
// on the fly?
// NOTE: it's very important to first assign to doc then pass it to
// termVectorsWriter.addAllDocVectors; see LUCENE-1282
Document doc = reader.reader.document(j);

View File

@ -27,13 +27,11 @@ import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.index.FieldInfo.IndexOptions;
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.util.BitVector;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -455,10 +453,9 @@ public class SegmentReader extends IndexReader implements Cloneable {
return core.fieldInfos;
}
@Override
public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException {
public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException {
ensureOpen();
return getFieldsReader().doc(n, fieldSelector);
getFieldsReader().visitDocument(docID, visitor);
}
@Override

View File

@ -0,0 +1,87 @@
package org.apache.lucene.index;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.IndexInput;
/**
* Expert: provides a low-level means of accessing the stored field
* values in an index. See {@link IndexReader#document(int,
* StoredFieldVisitor)}.
*
* See {@link DocumentStoredFieldVisitor}, which is a
* <code>StoredFieldVisitor</code> that builds the
* {@link Document} containing all stored fields. This is
* used by {@link IndexReader#document(int)}.
*
* @lucene.experimental */
public class StoredFieldVisitor {
/** Process a binary field. Note that if you want to
* skip the field you must seek the IndexInput
* (e.g., call <code>in.seek(numUTF8Bytes + in.getFilePointer()</code>)
*
* <p>Return true to stop loading fields. */
public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException {
in.seek(in.getFilePointer() + numBytes);
return false;
}
/** Process a string field by reading numUTF8Bytes.
* Note that if you want to skip the field you must
* seek the IndexInput as if you had read numBytes by
* (e.g., call <code>in.seek(numUTF8Bytes + in.getFilePointer()</code>)
*
* <p>Return true to stop loading fields. */
public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException {
in.seek(in.getFilePointer() + numUTF8Bytes);
return false;
}
/** Process a int numeric field.
*
* <p>Return true to stop loading fields. */
public boolean intField(FieldInfo fieldInfo, int value) throws IOException {
return false;
}
/** Process a long numeric field.
*
* <p>Return true to stop loading fields. */
public boolean longField(FieldInfo fieldInfo, long value) throws IOException {
return false;
}
/** Process a float numeric field.
*
* <p>Return true to stop loading fields. */
public boolean floatField(FieldInfo fieldInfo, float value) throws IOException {
return false;
}
/** Process a double numeric field.
*
* <p>Return true to stop loading fields. */
public boolean doubleField(FieldInfo fieldInfo, double value) throws IOException {
return false;
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.RamUsageEstimator;
@ -41,12 +40,12 @@ final class StoredFieldsWriter {
}
private int numStoredFields;
private Fieldable[] storedFields;
private IndexableField[] storedFields;
private int[] fieldNumbers;
public void reset() {
numStoredFields = 0;
storedFields = new Fieldable[1];
storedFields = new IndexableField[1];
fieldNumbers = new int[1];
}
@ -123,10 +122,10 @@ final class StoredFieldsWriter {
assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end");
}
public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException {
public void addField(IndexableField field, FieldInfo fieldInfo) throws IOException {
if (numStoredFields == storedFields.length) {
int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF);
Fieldable[] newArray = new Fieldable[newSize];
IndexableField[] newArray = new IndexableField[newSize];
System.arraycopy(storedFields, 0, newArray, 0, numStoredFields);
storedFields = newArray;
}

View File

@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef;
*/
public interface TermFreqVector {
/**
* The {@link org.apache.lucene.document.Fieldable} name.
* The {@link org.apache.lucene.index.IndexableField} name.
* @return The name of the field this vector is associated with.
*
*/

View File

@ -20,7 +20,6 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
@ -55,17 +54,17 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
}
@Override
boolean start(Fieldable[] fields, int count) {
boolean start(IndexableField[] fields, int count) {
doVectors = false;
doVectorPositions = false;
doVectorOffsets = false;
for(int i=0;i<count;i++) {
Fieldable field = fields[i];
if (field.isIndexed() && field.isTermVectorStored()) {
IndexableField field = fields[i];
if (field.indexed() && field.storeTermVectors()) {
doVectors = true;
doVectorPositions |= field.isStorePositionWithTermVector();
doVectorOffsets |= field.isStoreOffsetWithTermVector();
doVectorPositions |= field.storeTermVectorPositions();
doVectorOffsets |= field.storeTermVectorOffsets();
}
}
@ -188,7 +187,7 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField {
}
@Override
void start(Fieldable f) {
void start(IndexableField f) {
if (doVectorOffsets) {
offsetAttribute = fieldState.attributeSource.addAttribute(OffsetAttribute.class);
} else {

View File

@ -24,13 +24,11 @@ package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.document.Fieldable;
abstract class TermsHashConsumerPerField {
abstract boolean start(Fieldable[] fields, int count) throws IOException;
abstract boolean start(IndexableField[] fields, int count) throws IOException;
abstract void finish() throws IOException;
abstract void skippingLongTerm() throws IOException;
abstract void start(Fieldable field);
abstract void start(IndexableField field);
abstract void newTerm(int termID) throws IOException;
abstract void addTerm(int termID) throws IOException;
abstract int getStreamCount();

View File

@ -22,7 +22,6 @@ import java.util.Comparator;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.util.ByteBlockPool;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@ -116,7 +115,7 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
private boolean doNextCall;
@Override
void start(Fieldable f) {
void start(IndexableField f) {
termAtt = fieldState.attributeSource.getAttribute(TermToBytesRefAttribute.class);
termBytesRef = termAtt.getBytesRef();
consumer.start(f);
@ -126,11 +125,12 @@ final class TermsHashPerField extends InvertedDocConsumerPerField {
}
@Override
boolean start(Fieldable[] fields, int count) throws IOException {
boolean start(IndexableField[] fields, int count) throws IOException {
doCall = consumer.start(fields, count);
bytesHash.reinit();
if (nextPerField != null)
if (nextPerField != null) {
doNextCall = nextPerField.start(fields, count);
}
return doCall || doNextCall;
}

View File

@ -21,7 +21,6 @@ import java.util.Collection;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.values.IndexDocValuesArray;
import org.apache.lucene.index.values.IndexDocValues.Source;
import org.apache.lucene.index.values.IndexDocValues.SourceEnum;
import org.apache.lucene.index.values.IndexDocValuesArray.ByteValues;
@ -319,7 +318,7 @@ class IntsImpl {
try {
input = (IndexInput) datIn.clone();
input.seek(CodecUtil.headerLength(CODEC_NAME) + 1);
source = loadFixedSource(type, input, numDocs);
source = loadFixedSource(type, input, numDocs);
success = true;
return source;
} finally {

View File

@ -19,14 +19,12 @@ package org.apache.lucene.index.values;
import java.util.Comparator;
import org.apache.lucene.document.IndexDocValuesField;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.codecs.DocValuesConsumer;
import org.apache.lucene.util.BytesRef;
/**
* Per document and field values consumed by {@link DocValuesConsumer}.
* @see IndexDocValuesField
* @see Fieldable#setDocValues(PerDocFieldValues)
*
* @lucene.experimental
*/
@ -91,11 +89,10 @@ public interface PerDocFieldValues {
/**
* Sets the {@link ValueType}
*/
public void setType(ValueType type);
public void setDocValuesType(ValueType type);
/**
* Returns the {@link ValueType}
*/
public ValueType type();
}
public ValueType docValuesType();
}

Some files were not shown because too many files have changed in this diff Show More