diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index e56d65d2967..6eada8787be 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -255,6 +255,11 @@ Changes in Runtime Behavior * LUCENE-3146: IndexReader.setNorm throws IllegalStateException if the field does not store norms. (Shai Erera, Mike McCandless) + +* LUCENE-3309: Stored fields no longer record whether they were + tokenized or not. In general you should not rely on stored fields + to record any "metadata" from indexing (tokenized, omitNorms, + IndexOptions, boost, etc.) (Mike McCandless) API Changes diff --git a/lucene/contrib/CHANGES.txt b/lucene/contrib/CHANGES.txt index 43726d46970..3ee55548ffc 100644 --- a/lucene/contrib/CHANGES.txt +++ b/lucene/contrib/CHANGES.txt @@ -4,6 +4,14 @@ For more information on past and future Lucene versions, please see: http://s.apache.org/luceneversions ======================= Trunk (not yet released) ======================= + +Changes in Runtime Behavior + + * LUCENE-3309: Fast vector highlighter now inserts the + MultiValuedSeparator for NOT_ANALYZED fields (in addition to + ANALYZED fields). To ensure your offsets are correct you should + provide an analyzer that returns 1 from the offsetGap method. + (Mike McCandless) Build diff --git a/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 934e4d4c1c3..5c578dd637c 100644 --- a/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/contrib/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -22,7 +22,8 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.IndexWriterConfig; @@ -173,8 +174,7 @@ public class IndexFiles { // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: - Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); - pathField.setIndexOptions(IndexOptions.DOCS_ONLY); + Field pathField = new Field("path", StringField.TYPE_STORED, file.getPath()); doc.add(pathField); // Add the last modified date of the file a field named "modified". @@ -192,7 +192,7 @@ public class IndexFiles { // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. - doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); + doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): diff --git a/lucene/contrib/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java b/lucene/contrib/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java index 6d76153de64..ecd64f2feb5 100644 --- a/lucene/contrib/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java +++ b/lucene/contrib/demo/src/java/org/apache/lucene/demo/xmlparser/FormBasedXmlQueryDemo.java @@ -17,10 +17,26 @@ package org.apache.lucene.demo.xmlparser; +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.util.Enumeration; +import java.util.Properties; +import java.util.StringTokenizer; +import javax.servlet.RequestDispatcher; +import javax.servlet.ServletConfig; +import javax.servlet.ServletException; +import javax.servlet.http.HttpServlet; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -33,20 +49,6 @@ import org.apache.lucene.util.Version; import org.apache.lucene.xmlparser.CorePlusExtensionsParser; import org.apache.lucene.xmlparser.QueryTemplateManager; -import javax.servlet.RequestDispatcher; -import javax.servlet.ServletConfig; -import javax.servlet.ServletException; -import javax.servlet.http.HttpServlet; -import javax.servlet.http.HttpServletRequest; -import javax.servlet.http.HttpServletResponse; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.util.Enumeration; -import java.util.Properties; -import java.util.StringTokenizer; - public class FormBasedXmlQueryDemo extends HttpServlet { private QueryTemplateManager queryTemplateManager; @@ -124,20 +126,18 @@ public class FormBasedXmlQueryDemo extends HttpServlet { InputStream dataIn = getServletContext().getResourceAsStream("/WEB-INF/data.tsv"); BufferedReader br = new BufferedReader(new InputStreamReader(dataIn)); String line = br.readLine(); + final FieldType textNoNorms = new FieldType(TextField.TYPE_STORED); + textNoNorms.setOmitNorms(true); while (line != null) { line = line.trim(); if (line.length() > 0) { //parse row and create a document StringTokenizer st = new StringTokenizer(line, "\t"); Document doc = new Document(); - doc.add(new Field("location", st.nextToken(), Field.Store.YES, - Field.Index.ANALYZED_NO_NORMS)); - doc.add(new Field("salary", st.nextToken(), Field.Store.YES, - Field.Index.ANALYZED_NO_NORMS)); - doc.add(new Field("type", st.nextToken(), Field.Store.YES, - Field.Index.ANALYZED_NO_NORMS)); - doc.add(new Field("description", st.nextToken(), Field.Store.YES, - Field.Index.ANALYZED)); + doc.add(new Field("location", textNoNorms, st.nextToken())); + doc.add(new Field("salary", textNoNorms, st.nextToken())); + doc.add(new Field("type", textNoNorms, st.nextToken())); + doc.add(new Field("description", textNoNorms, st.nextToken())); writer.addDocument(doc); } line = br.readLine(); diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java index 1043b7cb292..3ba1daf1127 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java @@ -60,10 +60,11 @@ public class TokenSources { * @param analyzer The analyzer to use for creating the TokenStream if the * vector doesn't exist * @return The {@link org.apache.lucene.analysis.TokenStream} for the - * {@link org.apache.lucene.document.Fieldable} on the + * {@link org.apache.lucene.index.IndexableField} on the * {@link org.apache.lucene.document.Document} * @throws IOException if there was an error loading */ + public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document doc, Analyzer analyzer) throws IOException { TokenStream ts = null; diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java index 906c6b8d8e9..29d5a5a9964 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/BaseFragmentsBuilder.java @@ -21,15 +21,18 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; -import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.MapFieldSelector; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.highlight.Encoder; -import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo.SubInfo; +import org.apache.lucene.search.vectorhighlight.FieldFragList.WeightedFragInfo; import org.apache.lucene.search.vectorhighlight.FieldPhraseList.WeightedPhraseInfo.Toffs; +import org.apache.lucene.store.IndexInput; public abstract class BaseFragmentsBuilder implements FragmentsBuilder { @@ -107,10 +110,27 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { return fragments.toArray( new String[fragments.size()] ); } - protected Field[] getFields( IndexReader reader, int docId, String fieldName) throws IOException { + protected Field[] getFields( IndexReader reader, int docId, final String fieldName) throws IOException { // according to javadoc, doc.getFields(fieldName) cannot be used with lazy loaded field??? - Document doc = reader.document( docId, new MapFieldSelector(fieldName) ); - return doc.getFields( fieldName ); // according to Document class javadoc, this never returns null + final List fields = new ArrayList(); + reader.document(docId, new StoredFieldVisitor() { + @Override + public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException { + if (fieldInfo.name.equals(fieldName)) { + final byte[] b = new byte[numUTF8Bytes]; + in.readBytes(b, 0, b.length); + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(fieldInfo.storeTermVector); + ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector); + ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector); + fields.add(new Field(fieldInfo.name, ft, new String(b, "UTF-8"))); + } else { + in.seek(in.getFilePointer() + numUTF8Bytes); + } + return false; + } + }); + return fields.toArray(new Field[fields.size()]); } protected String makeFragment( StringBuilder buffer, int[] index, Field[] values, WeightedFragInfo fragInfo, @@ -142,8 +162,7 @@ public abstract class BaseFragmentsBuilder implements FragmentsBuilder { int startOffset, int endOffset ){ while( buffer.length() < endOffset && index[0] < values.length ){ buffer.append( values[index[0]].stringValue() ); - if( values[index[0]].isTokenized() ) - buffer.append( multiValuedSeparator ); + buffer.append( multiValuedSeparator ); index[0]++; } int eo = buffer.length() < endOffset ? buffer.length() : endOffset; diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java index ad8047887ca..93f713e9782 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldTermStack.java @@ -46,8 +46,12 @@ public class FieldTermStack { // Directory dir = new RAMDirectory(); // IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer)); // Document doc = new Document(); - // doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) ); - // doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) ); + // FieldType ft = new FieldType(TextField.TYPE_STORED); + // ft.setStoreTermVectors(true); + // ft.setStoreTermVectorOffsets(true); + // ft.setStoreTermVectorPositions(true); + // doc.add( new Field( "f", ft, "a a a b b c a b b c d e f" ) ); + // doc.add( new Field( "f", ft, "b a b a f" ) ); // writer.addDocument( doc ); // writer.close(); @@ -67,7 +71,7 @@ public class FieldTermStack { */ public FieldTermStack( IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery ) throws IOException { this.fieldName = fieldName; - + TermFreqVector tfv = reader.getTermFreqVector( docId, fieldName ); if( tfv == null ) return; // just return to make null snippets TermPositionVector tpv = null; diff --git a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html index a71dfb3040d..378ea588ad9 100644 --- a/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html +++ b/lucene/contrib/highlighter/src/java/org/apache/lucene/search/vectorhighlight/package.html @@ -25,7 +25,7 @@ This is an another highlighter implementation.
  • support N-gram fields
  • support phrase-unit highlighting with slops
  • need Java 1.5
  • -
  • highlight fields need to be TermVector.WITH_POSITIONS_OFFSETS
  • +
  • highlight fields need to be stored with Positions and Offsets
  • take into account query boost to score fragments
  • support colored highlight tags
  • pluggable FragListBuilder
  • @@ -95,7 +95,7 @@ will be generated:

    Step 2.

    In Step 2, Fast Vector Highlighter generates {@link org.apache.lucene.search.vectorhighlight.FieldTermStack}. Fast Vector Highlighter uses {@link org.apache.lucene.index.TermFreqVector} data -(must be stored {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS_OFFSETS}) +(must be stored {@link org.apache.lucene.document.FieldType#setStoreTermVectorOffsets(boolean)} and {@link org.apache.lucene.document.FieldType#setStoreTermVectorPositions(boolean)}) to generate it. FieldTermStack keeps the terms in the user query. Therefore, in this sample case, Fast Vector Highlighter generates the following FieldTermStack:

    diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
    index 5ab11fddffe..0e76d0deef9 100644
    --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
    +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterPhraseTest.java
    @@ -28,9 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
     import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
    -import org.apache.lucene.document.Field.Index;
    -import org.apache.lucene.document.Field.Store;
    -import org.apache.lucene.document.Field.TermVector;
    +import org.apache.lucene.document.FieldType;
    +import org.apache.lucene.document.TextField;
     import org.apache.lucene.index.CorruptIndexException;
     import org.apache.lucene.index.IndexReader;
     import org.apache.lucene.index.IndexWriter;
    @@ -61,8 +60,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamConcurrent(),
    -          TermVector.WITH_POSITIONS_OFFSETS));
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectorOffsets(true);
    +      customType.setStoreTermVectorPositions(true);
    +      customType.setStoreTermVectors(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamConcurrent()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    @@ -105,8 +107,12 @@ public class HighlighterPhraseTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamConcurrent(),
    -          TermVector.WITH_POSITIONS_OFFSETS));
    +
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectorOffsets(true);
    +      customType.setStoreTermVectorPositions(true);
    +      customType.setStoreTermVectors(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamConcurrent()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    @@ -176,8 +182,12 @@ public class HighlighterPhraseTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamSparse(),
    -          TermVector.WITH_POSITIONS_OFFSETS));
    +
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectorOffsets(true);
    +      customType.setStoreTermVectorPositions(true);
    +      customType.setStoreTermVectors(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamSparse()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    @@ -219,8 +229,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, TEXT, Store.YES, Index.ANALYZED,
    -          TermVector.WITH_OFFSETS));
    +
    +      FieldType customType = new FieldType(TextField.TYPE_STORED);
    +      customType.setStoreTermVectorOffsets(true);
    +      customType.setStoreTermVectors(true);
    +      document.add(new Field(FIELD, customType, TEXT));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    @@ -260,8 +273,11 @@ public class HighlighterPhraseTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamSparse(),
    -          TermVector.WITH_POSITIONS_OFFSETS));
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectorOffsets(true);
    +      customType.setStoreTermVectorPositions(true);
    +      customType.setStoreTermVectors(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamSparse()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    index e3581510e75..728edf5c5c7 100644
    --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/HighlighterTest.java
    @@ -38,8 +38,7 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
     import org.apache.lucene.document.NumericField;
    -import org.apache.lucene.document.Field.Index;
    -import org.apache.lucene.document.Field.Store;
    +import org.apache.lucene.document.TextField;
     import org.apache.lucene.index.IndexReader;
     import org.apache.lucene.index.IndexWriter;
     import org.apache.lucene.index.IndexWriterConfig;
    @@ -1629,7 +1628,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
       
       private Document doc( String f, String v ){
         Document doc = new Document();
    -    doc.add( new Field( f, v, Store.YES, Index.ANALYZED ) );
    +    doc.add( new Field( f, TextField.TYPE_STORED, v));
         return doc;
       }
       
    @@ -1690,7 +1689,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
        * QueryFragmentScorer(query));
        * 
        * for (int i = 0; i < hits.totalHits; i++) { String text =
    -   * searcher.doc(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
    +   * searcher.doc2(hits.scoreDocs[i].doc).get(FIELD_NAME); TokenStream
        * tokenStream=bigramAnalyzer.tokenStream(FIELD_NAME,new StringReader(text));
        * String highlightedText = highlighter.getBestFragment(tokenStream,text);
        * System.out.println(highlightedText); } }
    @@ -1744,21 +1743,21 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
           addDoc(writer, text);
         }
         Document doc = new Document();
    -    NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
    +    NumericField nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
         nfield.setIntValue(1);
         doc.add(nfield);
         writer.addDocument(doc, analyzer);
    -    nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
    +    nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
         nfield.setIntValue(3);
         doc = new Document();
         doc.add(nfield);
         writer.addDocument(doc, analyzer);
    -    nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
    +    nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
         nfield.setIntValue(5);
         doc = new Document();
         doc.add(nfield);
         writer.addDocument(doc, analyzer);
    -    nfield = new NumericField(NUMERIC_FIELD_NAME, Store.YES, true);
    +    nfield = new NumericField(NUMERIC_FIELD_NAME, NumericField.TYPE_STORED);
         nfield.setIntValue(7);
         doc = new Document();
         doc.add(nfield);
    @@ -1779,7 +1778,8 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
       }
       private void addDoc(IndexWriter writer, String text) throws IOException {
         Document d = new Document();
    -    Field f = new Field(FIELD_NAME, text, Field.Store.YES, Field.Index.ANALYZED);
    +
    +    Field f = new Field(FIELD_NAME, TextField.TYPE_STORED, text);
         d.add(f);
         writer.addDocument(d);
     
    diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
    index 02dd92d40e0..c368128c7a9 100644
    --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
    +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/highlight/TokenSourcesTest.java
    @@ -28,7 +28,8 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
     import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
    -import org.apache.lucene.document.Field.TermVector;
    +import org.apache.lucene.document.FieldType;
    +import org.apache.lucene.document.TextField;
     import org.apache.lucene.index.CorruptIndexException;
     import org.apache.lucene.index.IndexReader;
     import org.apache.lucene.index.IndexWriter;
    @@ -107,8 +108,10 @@ public class TokenSourcesTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamOverlap(),
    -          TermVector.WITH_OFFSETS));
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectors(true);
    +      customType.setStoreTermVectorOffsets(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    @@ -153,8 +156,11 @@ public class TokenSourcesTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamOverlap(),
    -          TermVector.WITH_POSITIONS_OFFSETS));
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectors(true);
    +      customType.setStoreTermVectorOffsets(true);
    +      customType.setStoreTermVectorPositions(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    @@ -199,8 +205,10 @@ public class TokenSourcesTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamOverlap(),
    -          TermVector.WITH_OFFSETS));
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectors(true);
    +      customType.setStoreTermVectorOffsets(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    @@ -246,8 +254,10 @@ public class TokenSourcesTest extends LuceneTestCase {
             newIndexWriterConfig(TEST_VERSION_CURRENT, new OverlapAnalyzer()));
         try {
           final Document document = new Document();
    -      document.add(new Field(FIELD, new TokenStreamOverlap(),
    -          TermVector.WITH_POSITIONS_OFFSETS));
    +      FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +      customType.setStoreTermVectors(true);
    +      customType.setStoreTermVectorOffsets(true);
    +      document.add(new Field(FIELD, customType, new TokenStreamOverlap()));
           indexWriter.addDocument(document);
         } finally {
           indexWriter.close();
    diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
    index 3bf22073508..befaef311e0 100644
    --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
    +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
    @@ -34,14 +34,13 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
     import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
    -import org.apache.lucene.document.Field.Index;
    -import org.apache.lucene.document.Field.Store;
    -import org.apache.lucene.document.Field.TermVector;
    +import org.apache.lucene.document.FieldType;
    +import org.apache.lucene.document.TextField;
     import org.apache.lucene.index.IndexReader;
     import org.apache.lucene.index.IndexWriter;
    +import org.apache.lucene.index.IndexWriterConfig.OpenMode;
     import org.apache.lucene.index.IndexWriterConfig;
     import org.apache.lucene.index.Term;
    -import org.apache.lucene.index.IndexWriterConfig.OpenMode;
     import org.apache.lucene.search.DisjunctionMaxQuery;
     import org.apache.lucene.search.PhraseQuery;
     import org.apache.lucene.search.Query;
    @@ -359,8 +358,13 @@ public abstract class AbstractTestCase extends LuceneTestCase {
         IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
             TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE));
         Document doc = new Document();
    -    for( String value: values )
    -      doc.add( new Field( F, value, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
    +    FieldType customType = new FieldType(TextField.TYPE_STORED);
    +    customType.setStoreTermVectors(true);
    +    customType.setStoreTermVectorOffsets(true);
    +    customType.setStoreTermVectorPositions(true);
    +    for( String value: values ) {
    +      doc.add( new Field( F, customType, value ) );
    +    }
         writer.addDocument( doc );
         writer.close();
         if (reader != null) reader.close();
    @@ -372,8 +376,14 @@ public abstract class AbstractTestCase extends LuceneTestCase {
         IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
             TEST_VERSION_CURRENT, analyzerK).setOpenMode(OpenMode.CREATE));
         Document doc = new Document();
    -    for( String value: values )
    -      doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
    +    FieldType customType = new FieldType(TextField.TYPE_STORED);
    +    customType.setStoreTermVectors(true);
    +    customType.setStoreTermVectorOffsets(true);
    +    customType.setStoreTermVectorPositions(true);
    +    for( String value: values ) {
    +      doc.add( new Field( F, customType, value ));
    +      //doc.add( new Field( F, value, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
    +    }
         writer.addDocument( doc );
         writer.close();
         if (reader != null) reader.close();
    diff --git a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
    index 795a7884224..7171b68007f 100644
    --- a/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
    +++ b/lucene/contrib/highlighter/src/test/org/apache/lucene/search/vectorhighlight/SimpleFragmentsBuilderTest.java
    @@ -19,9 +19,8 @@ package org.apache.lucene.search.vectorhighlight;
     
     import org.apache.lucene.document.Document;
     import org.apache.lucene.document.Field;
    -import org.apache.lucene.document.Field.Index;
    -import org.apache.lucene.document.Field.Store;
    -import org.apache.lucene.document.Field.TermVector;
    +import org.apache.lucene.document.FieldType;
    +import org.apache.lucene.document.TextField;
     import org.apache.lucene.index.IndexReader;
     import org.apache.lucene.index.IndexWriter;
     import org.apache.lucene.index.IndexWriterConfig;
    @@ -139,7 +138,12 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
         IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
             TEST_VERSION_CURRENT, analyzerW).setOpenMode(OpenMode.CREATE));
         Document doc = new Document();
    -    doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
    +    FieldType customType = new FieldType(TextField.TYPE_UNSTORED);
    +    customType.setStoreTermVectors(true);
    +    customType.setStoreTermVectorOffsets(true);
    +    customType.setStoreTermVectorPositions(true);
    +    doc.add( new Field( F, customType, "aaa" ) );
    +    //doc.add( new Field( F, "aaa", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
         writer.addDocument( doc );
         writer.close();
         if (reader != null) reader.close();
    @@ -155,9 +159,8 @@ public class SimpleFragmentsBuilderTest extends AbstractTestCase {
         SimpleFragListBuilder sflb = new SimpleFragListBuilder();
         FieldFragList ffl = sflb.createFieldFragList( fpl, 100 );
         SimpleFragmentsBuilder sfb = new SimpleFragmentsBuilder();
    -    // '/' separator doesn't effect the snippet because of NOT_ANALYZED field
         sfb.setMultiValuedSeparator( '/' );
    -    assertEquals( "abcdefghijkl", sfb.createFragment( reader, 0, F, ffl ) );
    +    assertEquals( "abc/defg/hijkl/", sfb.createFragment( reader, 0, F, ffl ) );
       }
       
       public void testMVSeparator() throws Exception {
    diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocument.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocument.java
    index 889f749d47e..10b124f9706 100644
    --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocument.java
    +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedDocument.java
    @@ -68,7 +68,6 @@ public class InstantiatedDocument {
         return document;
       }
     
    -
       @Override
       public String toString() {
         return document.toString();
    diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
    index f486142d032..21194e2b93e 100644
    --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
    +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndex.java
    @@ -27,8 +27,8 @@ import java.util.Set;
     
     import org.apache.lucene.analysis.Analyzer;
     import org.apache.lucene.document.Document;
    -import org.apache.lucene.document.Fieldable;
     import org.apache.lucene.index.IndexReader;
    +import org.apache.lucene.index.IndexableField;
     import org.apache.lucene.index.MultiNorms;
     import org.apache.lucene.index.Term;
     import org.apache.lucene.index.TermsEnum;
    @@ -190,16 +190,16 @@ public class InstantiatedIndex
             InstantiatedDocument document = new InstantiatedDocument();
             // copy stored fields from source reader
             Document sourceDocument = sourceIndexReader.document(i);
    -        for (Fieldable field : sourceDocument.getFields()) {
    +        for (IndexableField field : sourceDocument) {
               if (fields == null || fields.contains(field.name())) {
                 document.getDocument().add(field);
               }
             }
             document.setDocumentNumber(i);
             documentsByNumber[i] = document;
    -        for (Fieldable field : document.getDocument().getFields()) {
    +        for (IndexableField field : document.getDocument()) {
               if (fields == null || fields.contains(field.name())) {
    -            if (field.isTermVectorStored()) {
    +            if (field.storeTermVectors()) {
                   if (document.getVectorSpace() == null) {
                     document.setVectorSpace(new HashMap>());
                   }
    @@ -290,8 +290,8 @@ public class InstantiatedIndex
           if (document == null) {
             continue; // deleted
           }
    -      for (Fieldable field : document.getDocument().getFields()) {
    -        if (field.isTermVectorStored() && field.isStoreOffsetWithTermVector()) {
    +      for (IndexableField field : document.getDocument()) {
    +        if (field.storeTermVectors() && field.storeTermVectorOffsets()) {
               TermPositionVector termPositionVector = (TermPositionVector) sourceIndexReader.getTermFreqVector(document.getDocumentNumber(), field.name());
               if (termPositionVector != null) {
                 for (int i = 0; i < termPositionVector.getTerms().length; i++) {
    diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
    index b8b10477a34..817fbcea108 100644
    --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
    +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexReader.java
    @@ -30,7 +30,6 @@ import java.util.Set;
     import java.util.Comparator;
     
     import org.apache.lucene.document.Document;
    -import org.apache.lucene.document.FieldSelector;
     import org.apache.lucene.index.*;
     import org.apache.lucene.index.codecs.PerDocValues;
     import org.apache.lucene.store.Directory;
    @@ -251,42 +250,6 @@ public class InstantiatedIndexReader extends IndexReader {
         return fieldSet;
       }
     
    -  /**
    -   * Return the {@link org.apache.lucene.document.Document} at the nth
    -   * position.
    -     

    - * Warning! - * The resulting document is the actual stored document instance - * and not a deserialized clone as retuned by an IndexReader - * over a {@link org.apache.lucene.store.Directory}. - * I.e., if you need to touch the document, clone it first! - *

    - * This can also be seen as a feature for live changes of stored values, - * but be careful! Adding a field with an name unknown to the index - * or to a field with previously no stored values will make - * {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)} - * out of sync, causing problems for instance when merging the - * instantiated index to another index. -

    - * This implementation ignores the field selector! All stored fields are always returned! - *

    - * - * @param n document number - * @param fieldSelector ignored - * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * - * @see org.apache.lucene.document.Fieldable - * @see org.apache.lucene.document.FieldSelector - * @see org.apache.lucene.document.SetBasedFieldSelector - * @see org.apache.lucene.document.LoadFirstFieldSelector - */ - @Override - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { - return document(n); - } - /** * Returns the stored fields of the nth * Document in this index. @@ -313,6 +276,11 @@ public class InstantiatedIndexReader extends IndexReader { return getIndex().getDocumentsByNumber()[n].getDocument(); } + @Override + public void document(int docID, StoredFieldVisitor visitor) throws IOException { + throw new UnsupportedOperationException(); + } + /** * never ever touch these values. it is the true values, unless norms have * been touched. diff --git a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java index 8b2635085c7..9af81086ad1 100644 --- a/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java +++ b/lucene/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedIndexWriter.java @@ -37,9 +37,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermVectorOffsetInfo; import org.apache.lucene.search.IndexSearcher; @@ -238,7 +238,7 @@ public class InstantiatedIndexWriter implements Closeable { if (eFieldTermDocInfoFactoriesByTermText.getKey().indexed && !eFieldTermDocInfoFactoriesByTermText.getKey().omitNorms) { final String fieldName = eFieldTermDocInfoFactoriesByTermText.getKey().fieldName; final FieldInvertState invertState = new FieldInvertState(); - invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost * document.getDocument().getBoost()); + invertState.setBoost(eFieldTermDocInfoFactoriesByTermText.getKey().boost); invertState.setLength(eFieldTermDocInfoFactoriesByTermText.getKey().fieldLength); normsByFieldNameAndDocumentNumber.get(fieldName)[document.getDocumentNumber()] = similarityProvider.get(fieldName).computeNorm(invertState); } else { @@ -469,7 +469,7 @@ public class InstantiatedIndexWriter implements Closeable { // normalize settings per field name in document Map fieldSettingsByFieldName = new HashMap(); - for (Fieldable field : document.getDocument().getFields()) { + for (IndexableField field : document.getDocument()) { FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name()); if (fieldSetting == null) { fieldSetting = new FieldSetting(); @@ -479,52 +479,52 @@ public class InstantiatedIndexWriter implements Closeable { } // todo: fixme: multiple fields with the same name does not mean field boost += more boost. - fieldSetting.boost *= field.getBoost(); + fieldSetting.boost *= field.boost(); //fieldSettings.dimensions++; // once fieldSettings, always fieldSettings. - if (field.getOmitNorms()) { + if (field.omitNorms()) { fieldSetting.omitNorms = true; } - if (field.isIndexed() ) { + if (field.indexed() ) { fieldSetting.indexed = true; } - if (field.isTokenized()) { + if (field.tokenized()) { fieldSetting.tokenized = true; } - if (field.isStored()) { + if (field.stored()) { fieldSetting.stored = true; } - if (field.isBinary()) { + if (field.binaryValue() != null) { fieldSetting.isBinary = true; } - if (field.isTermVectorStored()) { + if (field.storeTermVectors()) { fieldSetting.storeTermVector = true; } - if (field.isStorePositionWithTermVector()) { + if (field.storeTermVectorPositions()) { fieldSetting.storePositionWithTermVector = true; } - if (field.isStoreOffsetWithTermVector()) { + if (field.storeTermVectorOffsets()) { fieldSetting.storeOffsetWithTermVector = true; } } - Map> tokensByField = new LinkedHashMap>(20); + Map> tokensByField = new LinkedHashMap>(20); // tokenize indexed fields. - for (Iterator it = document.getDocument().getFields().iterator(); it.hasNext();) { + for (Iterator it = document.getDocument().iterator(); it.hasNext();) { - Fieldable field = it.next(); + IndexableField field = it.next(); FieldSetting fieldSetting = fieldSettingsByFieldName.get(field.name()); - if (field.isIndexed()) { + if (field.indexed()) { LinkedList tokens = new LinkedList(); tokensByField.put(field, tokens); - if (field.isTokenized()) { + if (field.tokenized()) { final TokenStream tokenStream; // todo readerValue(), binaryValue() if (field.tokenStreamValue() != null) { @@ -564,8 +564,8 @@ public class InstantiatedIndexWriter implements Closeable { } } - if (!field.isStored()) { - it.remove(); + if (!field.stored()) { + //it.remove(); } } @@ -574,7 +574,7 @@ public class InstantiatedIndexWriter implements Closeable { termDocumentInformationFactoryByDocument.put(document, termDocumentInformationFactoryByTermTextAndFieldSetting); // build term vector, term positions and term offsets - for (Map.Entry> eField_Tokens : tokensByField.entrySet()) { + for (Map.Entry> eField_Tokens : tokensByField.entrySet()) { FieldSetting fieldSetting = fieldSettingsByFieldName.get(eField_Tokens.getKey().name()); Map termDocumentInformationFactoryByTermText = termDocumentInformationFactoryByTermTextAndFieldSetting.get(fieldSettingsByFieldName.get(eField_Tokens.getKey().name())); @@ -610,7 +610,7 @@ public class InstantiatedIndexWriter implements Closeable { termDocumentInformationFactory.payloads.add(null); } - if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) { + if (eField_Tokens.getKey().storeTermVectorOffsets()) { termDocumentInformationFactory.termOffsets.add(new TermVectorOffsetInfo(fieldSetting.offset + token.startOffset(), fieldSetting.offset + token.endOffset())); lastOffset = fieldSetting.offset + token.endOffset(); @@ -619,7 +619,7 @@ public class InstantiatedIndexWriter implements Closeable { } - if (eField_Tokens.getKey().isStoreOffsetWithTermVector()) { + if (eField_Tokens.getKey().storeTermVectorOffsets()) { fieldSetting.offset = lastOffset + 1; } diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java index 692d1a6f80b..004b8fb92ca 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestIndicesEquals.java @@ -29,6 +29,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiNorms; @@ -204,19 +206,44 @@ public class TestIndicesEquals extends LuceneTestCase { private void assembleDocument(Document document, int i) { - document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorOffsets(true); + customType.setStoreTermVectorPositions(true); + //document.add(new Field("a", i + " Do you really want to go and live in that house all winter?", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + document.add(new Field("a", customType, i + " Do you really want to go and live in that house all winter?")); if (i > 0) { - document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); - document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); - document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); + //document.add(new Field("b0", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + document.add(new Field("b0", customType, i + " All work and no play makes Jack a dull boy")); + + //document.add(new Field("b1", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS, Field.TermVector.NO)); + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setTokenized(false); + customType2.setOmitNorms(true); + document.add(new Field("b1", customType2, i + " All work and no play makes Jack a dull boy")); + + //document.add(new Field("b2", i + " All work and no play makes Jack a dull boy", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.NO)); + FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED); + customType3.setTokenized(false); + document.add(new Field("b1", customType3, i + " All work and no play makes Jack a dull boy")); + + //document.add(new Field("b3", i + " All work and no play makes Jack a dull boy", Field.Store.YES, Field.Index.NO, Field.TermVector.NO)); + FieldType customType4 = new FieldType(TextField.TYPE_STORED); + customType4.setIndexed(false); + customType4.setTokenized(false); + document.add(new Field("b1", customType4, i + " All work and no play makes Jack a dull boy")); if (i > 1) { - document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + //document.add(new Field("c", i + " Redrum redrum", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + document.add(new Field("c", customType, i + " Redrum redrum")); if (i > 2) { - document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + //document.add(new Field("d", i + " Hello Danny, come and play with us... forever and ever. and ever.", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + document.add(new Field("d", customType, i + " Hello Danny, come and play with us... forever and ever. and ever.")); if (i > 3) { - Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); - f.setOmitNorms(true); + //Field f = new Field("e", i + " Heres Johnny!", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + //f.setOmitNorms(true); + FieldType customType5 = new FieldType(TextField.TYPE_UNSTORED); + customType5.setOmitNorms(true); + Field f = new Field("e", customType5, i + " Heres Johnny!"); document.add(f); if (i > 4) { final List tokens = new ArrayList(2); @@ -247,7 +274,8 @@ public class TestIndicesEquals extends LuceneTestCase { } }; - document.add(new Field("f", ts)); + //document.add(new Field("f", ts)); + document.add(new TextField("f", ts)); } } } diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java index 43b11cc0100..bd1e7191f73 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestRealTime.java @@ -19,7 +19,7 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.Scorer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.util.LuceneTestCase; @@ -43,7 +43,7 @@ public class TestRealTime extends LuceneTestCase { Collector collector; doc = new Document(); - doc.add(new Field("f", "a", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new StringField("f", "a")); writer.addDocument(doc); writer.commit(); @@ -52,7 +52,7 @@ public class TestRealTime extends LuceneTestCase { assertEquals(1, collector.hits); doc = new Document(); - doc.add(new Field("f", "a", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(new StringField("f", "a")); writer.addDocument(doc); writer.commit(); diff --git a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java index eb98258d9b9..549164b1565 100644 --- a/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java +++ b/lucene/contrib/instantiated/src/test/org/apache/lucene/store/instantiated/TestUnoptimizedReaderOnConstructor.java @@ -25,7 +25,7 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; /** * @since 2009-mar-30 13:15:49 @@ -66,7 +66,7 @@ public class TestUnoptimizedReaderOnConstructor extends LuceneTestCase { private void addDocument(IndexWriter iw, String text) throws IOException { Document doc = new Document(); - doc.add(new Field("field", text, Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new TextField("field", text)); iw.addDocument(doc); } } diff --git a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java index d6e7e09e50d..daa2c889f2e 100644 --- a/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java +++ b/lucene/contrib/memory/src/java/org/apache/lucene/index/memory/MemoryIndex.java @@ -35,23 +35,24 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.OrdTermState; -import org.apache.lucene.index.TermState; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.index.FieldsEnum; -import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.FieldsEnum; +import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader.ReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermFreqVector; import org.apache.lucene.index.TermPositionVector; +import org.apache.lucene.index.TermState; import org.apache.lucene.index.TermVectorMapper; -import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.codecs.PerDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; @@ -60,8 +61,8 @@ import org.apache.lucene.search.Similarity; import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.RAMDirectory; // for javadocs import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Constants; // for javadocs /** @@ -240,11 +241,8 @@ public class MemoryIndex { /** * Convenience method; Tokenizes the given field text and adds the resulting * terms to the index; Equivalent to adding an indexed non-keyword Lucene - * {@link org.apache.lucene.document.Field} that is - * {@link org.apache.lucene.document.Field.Index#ANALYZED tokenized}, - * {@link org.apache.lucene.document.Field.Store#NO not stored}, - * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions} (or - * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions and offsets}), + * {@link org.apache.lucene.document.Field} that is tokenized, not stored, + * termVectorStored with positions (or termVectorStored with positions and offsets), * * @param fieldName * a name to be associated with the text @@ -1237,18 +1235,11 @@ public class MemoryIndex { } @Override - public Document document(int n) { + public void document(int docID, StoredFieldVisitor visitor) { if (DEBUG) System.err.println("MemoryIndexReader.document"); - return new Document(); // there are no stored fields + // no-op: there are no stored fields } - - //When we convert to JDK 1.5 make this Set - @Override - public Document document(int n, FieldSelector fieldSelector) throws IOException { - if (DEBUG) System.err.println("MemoryIndexReader.document"); - return new Document(); // there are no stored fields - } - + @Override public boolean hasDeletions() { if (DEBUG) System.err.println("MemoryIndexReader.hasDeletions"); diff --git a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java index 1fe710cc3a0..060778c4aa6 100644 --- a/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java +++ b/lucene/contrib/memory/src/test/org/apache/lucene/index/memory/MemoryIndexTest.java @@ -31,6 +31,7 @@ import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.QueryParser; @@ -108,8 +109,8 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase { IndexWriter writer = new IndexWriter(ramdir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); Document doc = new Document(); - Field field1 = newField("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED); - Field field2 = newField("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED); + Field field1 = newField("foo", fooField.toString(), TextField.TYPE_UNSTORED); + Field field2 = newField("term", termField.toString(), TextField.TYPE_UNSTORED); doc.add(field1); doc.add(field2); writer.addDocument(doc); diff --git a/lucene/src/java/org/apache/lucene/document/FieldSelector.java b/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelector.java similarity index 87% rename from lucene/src/java/org/apache/lucene/document/FieldSelector.java rename to lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelector.java index da98add236e..7ae9c470a3d 100755 --- a/lucene/src/java/org/apache/lucene/document/FieldSelector.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelector.java @@ -1,5 +1,6 @@ package org.apache.lucene.document; + /** * Copyright 2004 The Apache Software Foundation * @@ -18,7 +19,7 @@ package org.apache.lucene.document; /** * Similar to a {@link java.io.FileFilter}, the FieldSelector allows one to make decisions about - * what Fields get loaded on a {@link Document} by {@link org.apache.lucene.index.IndexReader#document(int,org.apache.lucene.document.FieldSelector)} + * what Fields get loaded on a {@link Document} by {@link FieldSelectorVisitor} * **/ public interface FieldSelector { diff --git a/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java b/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorResult.java similarity index 67% rename from lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java rename to lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorResult.java index 3a35fff9c8e..81f820d5f65 100755 --- a/lucene/src/java/org/apache/lucene/document/FieldSelectorResult.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorResult.java @@ -20,39 +20,41 @@ package org.apache.lucene.document; * Provides information about what should be done with this Field * **/ +import org.apache.lucene.index.IndexableField; // for javadocs + public enum FieldSelectorResult { /** * Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is encountered. - * {@link Document#getField(String)} and {@link Document#getFieldable(String)} should not return null. + * {@link Document#getField(String)} should not return null. *

    - * {@link Document#add(Fieldable)} should be called by the Reader. + * {@link Document#add(IndexableField)} should be called by the Reader. */ LOAD, /** * Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually contain its data until - * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should - * return a valid instance of a {@link Fieldable}. + * invoked. {@link Document#getField(String)} is safe to use and should + * return a valid instance of a {@link IndexableField}. *

    - * {@link Document#add(Fieldable)} should be called by the Reader. + * {@link Document#add(IndexableField)} should be called by the Reader. */ LAZY_LOAD, /** - * Do not load the {@link Field}. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should return null. - * {@link Document#add(Fieldable)} is not called. + * Do not load the {@link Field}. {@link Document#getField(String)} should return null. + * {@link Document#add(IndexableField)} is not called. *

    - * {@link Document#add(Fieldable)} should not be called by the Reader. + * {@link Document#add(IndexableField)} should not be called by the Reader. */ NO_LOAD, /** * Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading for the {@link Document}. Thus, the - * Document may not have its complete set of Fields. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should + * Document may not have its complete set of Fields. {@link Document#getField(String)} should * both be valid for this {@link Field} *

    - * {@link Document#add(Fieldable)} should be called by the Reader. + * {@link Document#add(IndexableField)} should be called by the Reader. */ LOAD_AND_BREAK, @@ -67,10 +69,10 @@ public enum FieldSelectorResult { /** * Lazily load this {@link Field}, but do not cache the result. This means the {@link Field} is valid, but it may not actually contain its data until - * invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link Document#getFieldable(String)} is safe to use and should - * return a valid instance of a {@link Fieldable}. + * invoked. {@link Document#getField(String)} is safe to use and should + * return a valid instance of a {@link IndexableField}. *

    - * {@link Document#add(Fieldable)} should be called by the Reader. + * {@link Document#add(IndexableField)} should be called by the Reader. */ LATENT } diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java b/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java new file mode 100644 index 00000000000..a5583cc5239 --- /dev/null +++ b/lucene/contrib/misc/src/java/org/apache/lucene/document/FieldSelectorVisitor.java @@ -0,0 +1,319 @@ +package org.apache.lucene.document; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.io.Reader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.NumericField.DataType; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldReaderException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.util.BytesRef; + +/** Create this, passing a legacy {@link FieldSelector} to it, then + * pass this class to {@link IndexReader#document(int, + * StoredFieldVisitor)}, then call {@link #getDocument} to + * retrieve the loaded document. + + *

    NOTE: If you use Lazy fields, you should not + * access the returned document after the reader has been + * closed! + */ + +public class FieldSelectorVisitor extends StoredFieldVisitor { + + private final FieldSelector selector; + private final Document doc; + + public FieldSelectorVisitor(FieldSelector selector) { + this.selector = selector; + doc = new Document(); + } + + public Document getDocument() { + return doc; + } + + @Override + public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException { + final FieldSelectorResult accept = selector.accept(fieldInfo.name); + switch (accept) { + case LOAD: + case LOAD_AND_BREAK: + final byte[] b = new byte[numBytes]; + in.readBytes(b, 0, b.length); + doc.add(new BinaryField(fieldInfo.name, b)); + return accept != FieldSelectorResult.LOAD; + case LAZY_LOAD: + case LATENT: + addFieldLazy(in, fieldInfo, true, accept == FieldSelectorResult.LAZY_LOAD, numBytes); + return false; + case SIZE: + case SIZE_AND_BREAK: + in.seek(in.getFilePointer() + numBytes); + addFieldSize(fieldInfo, numBytes); + return accept != FieldSelectorResult.SIZE; + default: + // skip + in.seek(in.getFilePointer() + numBytes); + return false; + } + } + + @Override + public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException { + final FieldSelectorResult accept = selector.accept(fieldInfo.name); + switch (accept) { + case LOAD: + case LOAD_AND_BREAK: + final byte[] b = new byte[numUTF8Bytes]; + in.readBytes(b, 0, b.length); + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(fieldInfo.storeTermVector); + ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector); + ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector); + doc.add(new Field(fieldInfo.name, ft, new String(b, "UTF-8"))); + return accept != FieldSelectorResult.LOAD; + case LAZY_LOAD: + case LATENT: + addFieldLazy(in, fieldInfo, false, accept == FieldSelectorResult.LAZY_LOAD, numUTF8Bytes); + return false; + case SIZE: + case SIZE_AND_BREAK: + in.seek(in.getFilePointer() + numUTF8Bytes); + addFieldSize(fieldInfo, 2*numUTF8Bytes); + return accept != FieldSelectorResult.SIZE; + default: + // skip + in.seek(in.getFilePointer() + numUTF8Bytes); + return false; + } + } + + @Override + public boolean intField(FieldInfo fieldInfo, int value) throws IOException { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + ft.setOmitNorms(fieldInfo.omitNorms); + ft.setIndexOptions(fieldInfo.indexOptions); + return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setIntValue(value)); + } + + @Override + public boolean longField(FieldInfo fieldInfo, long value) throws IOException { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + ft.setOmitNorms(fieldInfo.omitNorms); + ft.setIndexOptions(fieldInfo.indexOptions); + return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setLongValue(value)); + } + + @Override + public boolean floatField(FieldInfo fieldInfo, float value) throws IOException { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + ft.setOmitNorms(fieldInfo.omitNorms); + ft.setIndexOptions(fieldInfo.indexOptions); + return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setFloatValue(value)); + } + + @Override + public boolean doubleField(FieldInfo fieldInfo, double value) throws IOException { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + ft.setOmitNorms(fieldInfo.omitNorms); + ft.setIndexOptions(fieldInfo.indexOptions); + return addNumericField(fieldInfo, new NumericField(fieldInfo.name, ft).setDoubleValue(value)); + } + + private boolean addNumericField(FieldInfo fieldInfo, NumericField f) { + doc.add(f); + final FieldSelectorResult accept = selector.accept(fieldInfo.name); + switch (accept) { + case LOAD: + return false; + case LOAD_AND_BREAK: + return true; + case LAZY_LOAD: + case LATENT: + return false; + case SIZE: + return false; + case SIZE_AND_BREAK: + return true; + default: + return false; + } + } + + private void addFieldLazy(IndexInput in, FieldInfo fi, boolean binary, boolean cacheResult, int numBytes) throws IOException { + final IndexableField f; + final long pointer = in.getFilePointer(); + // Need to move the pointer ahead by toRead positions + in.seek(pointer+numBytes); + FieldType ft = new FieldType(); + ft.setStored(true); + ft.setOmitNorms(fi.omitNorms); + ft.setIndexOptions(fi.indexOptions); + ft.setLazy(true); + + if (binary) { + f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult); + } else { + ft.setStoreTermVectors(fi.storeTermVector); + ft.setStoreTermVectorOffsets(fi.storeOffsetWithTermVector); + ft.setStoreTermVectorPositions(fi.storePositionWithTermVector); + f = new LazyField(in, fi.name, ft, numBytes, pointer, binary, cacheResult); + } + + doc.add(f); + } + + // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) + // Read just the size -- caller must skip the field content to continue reading fields + // Return the size in bytes or chars, depending on field type + private void addFieldSize(FieldInfo fi, int numBytes) throws IOException { + byte[] sizebytes = new byte[4]; + sizebytes[0] = (byte) (numBytes>>>24); + sizebytes[1] = (byte) (numBytes>>>16); + sizebytes[2] = (byte) (numBytes>>> 8); + sizebytes[3] = (byte) numBytes ; + doc.add(new BinaryField(fi.name, sizebytes)); + } + + /** + * A Lazy field implementation that defers loading of fields until asked for, instead of when the Document is + * loaded. + */ + private static class LazyField extends Field { + private int toRead; + private long pointer; + private final boolean cacheResult; + private final IndexInput in; + private boolean isBinary; + + public LazyField(IndexInput in, String name, FieldType ft, int toRead, long pointer, boolean isBinary, boolean cacheResult) { + super(name, ft); + this.in = in; + this.toRead = toRead; + this.pointer = pointer; + this.isBinary = isBinary; + this.cacheResult = cacheResult; + } + + @Override + public Number numericValue() { + return null; + } + + @Override + public DataType numericDataType() { + return null; + } + + private IndexInput localFieldsStream; + + private IndexInput getFieldStream() { + if (localFieldsStream == null) { + localFieldsStream = (IndexInput) in.clone(); + } + return localFieldsStream; + } + + /** The value of the field as a Reader, or null. If null, the String value, + * binary value, or TokenStream value is used. Exactly one of stringValue(), + * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ + @Override + public Reader readerValue() { + return null; + } + + /** The value of the field as a TokenStream, or null. If null, the Reader value, + * String value, or binary value is used. Exactly one of stringValue(), + * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ + @Override + public TokenStream tokenStreamValue() { + return null; + } + + /** The value of the field as a String, or null. If null, the Reader value, + * binary value, or TokenStream value is used. Exactly one of stringValue(), + * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ + @Override + synchronized public String stringValue() { + if (isBinary) { + return null; + } else { + if (fieldsData == null) { + String result = null; + IndexInput localFieldsStream = getFieldStream(); + try { + localFieldsStream.seek(pointer); + byte[] bytes = new byte[toRead]; + localFieldsStream.readBytes(bytes, 0, toRead); + result = new String(bytes, "UTF-8"); + } catch (IOException e) { + throw new FieldReaderException(e); + } + if (cacheResult == true){ + fieldsData = result; + } + return result; + } else { + return (String) fieldsData; + } + } + } + + @Override + synchronized public BytesRef binaryValue() { + if (isBinary) { + if (fieldsData == null) { + // Allocate new buffer if result is null or too small + final byte[] b = new byte[toRead]; + + IndexInput localFieldsStream = getFieldStream(); + + // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people + // since they are already handling this exception when getting the document + try { + localFieldsStream.seek(pointer); + localFieldsStream.readBytes(b, 0, toRead); + } catch (IOException e) { + throw new FieldReaderException(e); + } + + final BytesRef result = new BytesRef(b); + result.length = toRead; + if (cacheResult == true){ + fieldsData = result; + } + return result; + } else { + return (BytesRef) fieldsData; + } + } else { + return null; + } + } + } +} diff --git a/lucene/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java b/lucene/contrib/misc/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java similarity index 99% rename from lucene/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java rename to lucene/contrib/misc/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java index 9928dd41a15..3daaf66d997 100755 --- a/lucene/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/document/LoadFirstFieldSelector.java @@ -26,4 +26,4 @@ public class LoadFirstFieldSelector implements FieldSelector { public FieldSelectorResult accept(String fieldName) { return FieldSelectorResult.LOAD_AND_BREAK; } -} \ No newline at end of file +} diff --git a/lucene/src/java/org/apache/lucene/document/MapFieldSelector.java b/lucene/contrib/misc/src/java/org/apache/lucene/document/MapFieldSelector.java similarity index 100% rename from lucene/src/java/org/apache/lucene/document/MapFieldSelector.java rename to lucene/contrib/misc/src/java/org/apache/lucene/document/MapFieldSelector.java diff --git a/lucene/src/java/org/apache/lucene/document/SetBasedFieldSelector.java b/lucene/contrib/misc/src/java/org/apache/lucene/document/SetBasedFieldSelector.java similarity index 99% rename from lucene/src/java/org/apache/lucene/document/SetBasedFieldSelector.java rename to lucene/contrib/misc/src/java/org/apache/lucene/document/SetBasedFieldSelector.java index 598e1eebe56..813368bf3a9 100755 --- a/lucene/src/java/org/apache/lucene/document/SetBasedFieldSelector.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/document/SetBasedFieldSelector.java @@ -1,6 +1,5 @@ package org.apache.lucene.document; -import java.util.Set; /** * Copyright 2004 The Apache Software Foundation * @@ -17,10 +16,14 @@ import java.util.Set; * limitations under the License. */ +import java.util.Set; + + /** * Declare what fields to load normally and what fields to load lazily * **/ + public class SetBasedFieldSelector implements FieldSelector { private Set fieldsToLoad; @@ -55,4 +58,4 @@ public class SetBasedFieldSelector implements FieldSelector { } return result; } -} \ No newline at end of file +} diff --git a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java index c17ac02aed7..4322f0ad333 100644 --- a/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java +++ b/lucene/contrib/misc/src/java/org/apache/lucene/index/FieldNormModifier.java @@ -35,7 +35,7 @@ import org.apache.lucene.util.ReaderUtil; * * If Similarity class is specified, uses its computeNorm method to set norms. * If -n command line argument is used, removed field norms, as if - * {@link org.apache.lucene.document.Field.Index}.NO_NORMS was used. + * {@link org.apache.lucene.document.FieldType#setOmitNorms(boolean)} was used. * *

    * NOTE: This will overwrite any length normalization or field/document boosts. diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribFieldsReader.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribFieldsReader.java new file mode 100644 index 00000000000..d3ae6894d56 --- /dev/null +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribFieldsReader.java @@ -0,0 +1,318 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.File; +import java.io.IOException; +import java.util.*; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.FieldSelectorResult; +import org.apache.lucene.document.FieldSelectorVisitor; +import org.apache.lucene.document.LoadFirstFieldSelector; +import org.apache.lucene.document.SetBasedFieldSelector; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; +import org.junit.AfterClass; +import org.junit.BeforeClass; + + +public class TestContribFieldsReader extends LuceneTestCase { + private static Directory dir; + private static org.apache.lucene.document.Document testDoc = new org.apache.lucene.document.Document(); + private static FieldInfos fieldInfos = null; + + @BeforeClass + public static void beforeClass() throws Exception { + fieldInfos = new FieldInfos(); + DocHelper.setupDoc(testDoc); + _TestUtil.add(testDoc, fieldInfos); + dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy()); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); + IndexWriter writer = new IndexWriter(dir, conf); + writer.addDocument(testDoc); + writer.close(); + } + + @AfterClass + public static void afterClass() throws Exception { + dir.close(); + dir = null; + fieldInfos = null; + testDoc = null; + } + + private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException { + final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector); + ir.document(docID, visitor); + return visitor.getDocument(); + } + + public void testLazyFields() throws Exception { + assertTrue(dir != null); + assertTrue(fieldInfos != null); + IndexReader reader = IndexReader.open(dir); + Set loadFieldNames = new HashSet(); + loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY); + loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY); + Set lazyFieldNames = new HashSet(); + //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; + lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); + lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY); + lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY); + lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY); + SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); + Document doc = getDocument(reader, 0, fieldSelector); + assertTrue("doc is null and it shouldn't be", doc != null); + IndexableField field = doc.getField(DocHelper.LAZY_FIELD_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("field is not lazy and it should be", field.getClass().getSimpleName().equals("LazyField")); + String value = field.stringValue(); + assertTrue("value is null and it shouldn't be", value != null); + assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + + field = doc.getField(DocHelper.TEXT_FIELD_1_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField")); + field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField")); + assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true); + + field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField")); + assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true); + + field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null); + + byte [] bytes = field.binaryValue().bytes; + assertTrue("bytes is null and it shouldn't be", bytes != null); + assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length); + assertTrue("calling binaryValue() twice should give same reference", field.binaryValue().bytes == field.binaryValue().bytes); + for (int i = 0; i < bytes.length; i++) { + assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]); + + } + reader.close(); + } + + public void testLatentFields() throws Exception { + assertTrue(dir != null); + assertTrue(fieldInfos != null); + IndexReader reader = IndexReader.open(dir); + Set loadFieldNames = new HashSet(); + loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY); + loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY); + Set lazyFieldNames = new HashSet(); + //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; + lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); + lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY); + lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY); + lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY); + + // Use LATENT instead of LAZY + SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) { + @Override + public FieldSelectorResult accept(String fieldName) { + final FieldSelectorResult result = super.accept(fieldName); + if (result == FieldSelectorResult.LAZY_LOAD) { + return FieldSelectorResult.LATENT; + } else { + return result; + } + } + }; + + Document doc = getDocument(reader, 0, fieldSelector); + assertTrue("doc is null and it shouldn't be", doc != null); + IndexableField field = doc.getField(DocHelper.LAZY_FIELD_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("field is not lazy and it should be", field.getClass().getSimpleName().equals("LazyField")); + String value = field.stringValue(); + assertTrue("value is null and it shouldn't be", value != null); + assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true); + assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue()); + + field = doc.getField(DocHelper.TEXT_FIELD_1_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField")); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + + field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertFalse("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField")); + assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true); + assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); + + field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("Field is lazy and it should not be", field.getClass().getSimpleName().equals("LazyField")); + assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true); + assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue()); + + field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null); + assertTrue("calling binaryValue() twice should give different references", field.binaryValue().bytes != field.binaryValue().bytes); + + byte [] bytes = field.binaryValue().bytes; + assertTrue("bytes is null and it shouldn't be", bytes != null); + assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length); + for (int i = 0; i < bytes.length; i++) { + assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]); + + } + reader.close(); + } + + public void testLoadFirst() throws Exception { + assertTrue(dir != null); + assertTrue(fieldInfos != null); + IndexReader reader = IndexReader.open(dir); + LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector(); + Document doc = getDocument(reader, 0, fieldSelector); + assertTrue("doc is null and it shouldn't be", doc != null); + int count = 0; + List l = doc.getFields(); + for (final IndexableField IndexableField : l ) { + Field field = (Field) IndexableField; + + assertTrue("field is null and it shouldn't be", field != null); + String sv = field.stringValue(); + assertTrue("sv is null and it shouldn't be", sv != null); + count++; + } + assertTrue(count + " does not equal: " + 1, count == 1); + reader.close(); + } + + /** + * Not really a test per se, but we should have some way of assessing whether this is worthwhile. + *

    + * Must test using a File based directory + * + * @throws Exception + */ + public void testLazyPerformance() throws Exception { + String userName = System.getProperty("user.name"); + File file = _TestUtil.getTempDir("lazyDir" + userName); + Directory tmpDir = newFSDirectory(file); + assertTrue(tmpDir != null); + + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy()); + ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); + IndexWriter writer = new IndexWriter(tmpDir, conf); + writer.addDocument(testDoc); + writer.close(); + + assertTrue(fieldInfos != null); + long lazyTime = 0; + long regularTime = 0; + int length = 10; + Set lazyFieldNames = new HashSet(); + lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); + SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections. emptySet(), lazyFieldNames); + + for (int i = 0; i < length; i++) { + IndexReader reader = IndexReader.open(tmpDir); + + Document doc; + doc = reader.document(0);//Load all of them + assertTrue("doc is null and it shouldn't be", doc != null); + IndexableField field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY); + assertTrue("field is null and it shouldn't be", field != null); + assertFalse("field is lazy", field.getClass().getSimpleName().equals("LazyField")); + String value; + long start; + long finish; + start = System.currentTimeMillis(); + //On my machine this was always 0ms. + value = field.stringValue(); + finish = System.currentTimeMillis(); + assertTrue("value is null and it shouldn't be", value != null); + regularTime += (finish - start); + reader.close(); + reader = null; + doc = null; + //Hmmm, are we still in cache??? + System.gc(); + reader = IndexReader.open(tmpDir); + doc = getDocument(reader, 0, fieldSelector); + field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY); + assertTrue("field is not lazy", field.getClass().getSimpleName().equals("LazyField")); + start = System.currentTimeMillis(); + //On my machine this took around 50 - 70ms + value = field.stringValue(); + finish = System.currentTimeMillis(); + assertTrue("value is null and it shouldn't be", value != null); + lazyTime += (finish - start); + reader.close(); + + } + tmpDir.close(); + if (VERBOSE) { + System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); + System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); + } + } + + public void testLoadSize() throws IOException { + IndexReader reader = IndexReader.open(dir); + Document doc; + + doc = getDocument(reader, 0, new FieldSelector(){ + public FieldSelectorResult accept(String fieldName) { + if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) || + fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY)) + return FieldSelectorResult.SIZE; + else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY)) + return FieldSelectorResult.LOAD; + else + return FieldSelectorResult.NO_LOAD; + } + }); + IndexableField f1 = doc.getField(DocHelper.TEXT_FIELD_1_KEY); + IndexableField f3 = doc.getField(DocHelper.TEXT_FIELD_3_KEY); + IndexableField fb = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY); + assertTrue(f1.binaryValue()!=null); + assertTrue(f3.binaryValue()==null); + assertTrue(fb.binaryValue()!=null); + assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.binaryValue().bytes); + assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue()); + assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.binaryValue().bytes); + + reader.close(); + } + + private void assertSizeEquals(int size, byte[] sizebytes) { + assertEquals((byte) (size>>>24), sizebytes[0]); + assertEquals((byte) (size>>>16), sizebytes[1]); + assertEquals((byte) (size>>> 8), sizebytes[2]); + assertEquals((byte) size , sizebytes[3]); + } +} diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribIndexReader.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribIndexReader.java new file mode 100644 index 00000000000..d2a741d2c5d --- /dev/null +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribIndexReader.java @@ -0,0 +1,169 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.BinaryField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.FieldSelectorVisitor; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.SetBasedFieldSelector; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; + +public class TestContribIndexReader extends LuceneTestCase { + private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException { + final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector); + ir.document(docID, visitor); + return visitor.getDocument(); + } + + static void addDoc(IndexWriter writer, String value) throws IOException { + Document doc = new Document(); + doc.add(newField("content", value, TextField.TYPE_UNSTORED)); + writer.addDocument(doc); + } + + static void addDocumentWithFields(IndexWriter writer) throws IOException { + Document doc = new Document(); + + FieldType customType3 = new FieldType(); + customType3.setStored(true); + doc.add(newField("keyword", "test1", StringField.TYPE_STORED)); + doc.add(newField("text", "test1", TextField.TYPE_STORED)); + doc.add(newField("unindexed", "test1", customType3)); + doc.add(new TextField("unstored","test1")); + writer.addDocument(doc); + } + + + static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException { + Document doc = new Document(); + + FieldType customType3 = new FieldType(); + customType3.setStored(true); + doc.add(newField("keyword2", "test1", StringField.TYPE_STORED)); + doc.add(newField("text2", "test1", TextField.TYPE_STORED)); + doc.add(newField("unindexed2", "test1", customType3)); + doc.add(new TextField("unstored2","test1")); + writer.addDocument(doc); + } + + static void addDocumentWithTermVectorFields(IndexWriter writer) throws IOException { + Document doc = new Document(); + FieldType customType5 = new FieldType(TextField.TYPE_STORED); + customType5.setStoreTermVectors(true); + FieldType customType6 = new FieldType(TextField.TYPE_STORED); + customType6.setStoreTermVectors(true); + customType6.setStoreTermVectorOffsets(true); + FieldType customType7 = new FieldType(TextField.TYPE_STORED); + customType7.setStoreTermVectors(true); + customType7.setStoreTermVectorPositions(true); + FieldType customType8 = new FieldType(TextField.TYPE_STORED); + customType8.setStoreTermVectors(true); + customType8.setStoreTermVectorOffsets(true); + customType8.setStoreTermVectorPositions(true); + doc.add(newField("tvnot","tvnot",TextField.TYPE_STORED)); + doc.add(newField("termvector","termvector",customType5)); + doc.add(newField("tvoffset","tvoffset", customType6)); + doc.add(newField("tvposition","tvposition", customType7)); + doc.add(newField("tvpositionoffset","tvpositionoffset", customType8)); + + writer.addDocument(doc); + } + + public void testBinaryFields() throws IOException { + Directory dir = newDirectory(); + byte[] bin = new byte[]{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); + + for (int i = 0; i < 10; i++) { + addDoc(writer, "document number " + (i + 1)); + addDocumentWithFields(writer); + addDocumentWithDifferentFields(writer); + addDocumentWithTermVectorFields(writer); + } + writer.close(); + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); + Document doc = new Document(); + doc.add(new BinaryField("bin1", bin)); + doc.add(new TextField("junk", "junk text")); + writer.addDocument(doc); + writer.close(); + IndexReader reader = IndexReader.open(dir, false); + Document doc2 = reader.document(reader.maxDoc() - 1); + IndexableField[] fields = doc2.getFields("bin1"); + assertNotNull(fields); + assertEquals(1, fields.length); + Field b1 = (Field) fields[0]; + assertTrue(b1.isBinary()); + BytesRef bytesRef = b1.binaryValue(); + assertEquals(bin.length, bytesRef.length); + for (int i = 0; i < bin.length; i++) { + assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); + } + Set lazyFields = new HashSet(); + lazyFields.add("bin1"); + FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields); + doc2 = getDocument(reader, reader.maxDoc() - 1, sel); + fields = doc2.getFields("bin1"); + assertNotNull(fields); + assertEquals(1, fields.length); + IndexableField fb1 = fields[0]; + assertTrue(fb1.binaryValue()!=null); + bytesRef = fb1.binaryValue(); + assertEquals(bin.length, bytesRef.bytes.length); + assertEquals(bin.length, bytesRef.length); + for (int i = 0; i < bin.length; i++) { + assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); + } + reader.close(); + // force optimize + + + writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); + writer.optimize(); + writer.close(); + reader = IndexReader.open(dir, false); + doc2 = reader.document(reader.maxDoc() - 1); + fields = doc2.getFields("bin1"); + assertNotNull(fields); + assertEquals(1, fields.length); + b1 = (Field) fields[0]; + assertTrue(b1.isBinary()); + bytesRef = b1.binaryValue(); + assertEquals(bin.length, bytesRef.length); + for (int i = 0; i < bin.length; i++) { + assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); + } + reader.close(); + dir.close(); + } +} diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribParallelReader.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribParallelReader.java new file mode 100644 index 00000000000..dcb2c925237 --- /dev/null +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestContribParallelReader.java @@ -0,0 +1,149 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Arrays; +import java.util.Random; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.FieldSelectorVisitor; +import org.apache.lucene.document.MapFieldSelector; +import org.apache.lucene.document.TextField; +import org.apache.lucene.search.*; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.LuceneTestCase; + +public class TestContribParallelReader extends LuceneTestCase { + + private IndexSearcher parallel; + private IndexSearcher single; + private Directory dir, dir1, dir2; + + @Override + public void setUp() throws Exception { + super.setUp(); + single = single(random); + parallel = parallel(random); + } + + @Override + public void tearDown() throws Exception { + single.getIndexReader().close(); + single.close(); + parallel.getIndexReader().close(); + parallel.close(); + dir.close(); + dir1.close(); + dir2.close(); + super.tearDown(); + } + + // Fields 1-4 indexed together: + private IndexSearcher single(Random random) throws IOException { + dir = newDirectory(); + IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document d1 = new Document(); + d1.add(newField("f1", "v1", TextField.TYPE_STORED)); + d1.add(newField("f2", "v1", TextField.TYPE_STORED)); + d1.add(newField("f3", "v1", TextField.TYPE_STORED)); + d1.add(newField("f4", "v1", TextField.TYPE_STORED)); + w.addDocument(d1); + Document d2 = new Document(); + d2.add(newField("f1", "v2", TextField.TYPE_STORED)); + d2.add(newField("f2", "v2", TextField.TYPE_STORED)); + d2.add(newField("f3", "v2", TextField.TYPE_STORED)); + d2.add(newField("f4", "v2", TextField.TYPE_STORED)); + w.addDocument(d2); + w.close(); + + return new IndexSearcher(dir, false); + } + + // Fields 1 & 2 in one index, 3 & 4 in other, with ParallelReader: + private IndexSearcher parallel(Random random) throws IOException { + dir1 = getDir1(random); + dir2 = getDir2(random); + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + return newSearcher(pr); + } + + private Document getDocument(IndexReader ir, int docID, FieldSelector selector) throws IOException { + final FieldSelectorVisitor visitor = new FieldSelectorVisitor(selector); + ir.document(docID, visitor); + return visitor.getDocument(); + } + + public void testDocument() throws IOException { + Directory dir1 = getDir1(random); + Directory dir2 = getDir2(random); + ParallelReader pr = new ParallelReader(); + pr.add(IndexReader.open(dir1, false)); + pr.add(IndexReader.open(dir2, false)); + + Document doc11 = getDocument(pr, 0, new MapFieldSelector("f1")); + Document doc24 = getDocument(pr, 1, new MapFieldSelector(Arrays.asList("f4"))); + Document doc223 = getDocument(pr, 1, new MapFieldSelector("f2", "f3")); + + assertEquals(1, doc11.getFields().size()); + assertEquals(1, doc24.getFields().size()); + assertEquals(2, doc223.getFields().size()); + + assertEquals("v1", doc11.get("f1")); + assertEquals("v2", doc24.get("f4")); + assertEquals("v2", doc223.get("f2")); + assertEquals("v2", doc223.get("f3")); + pr.close(); + dir1.close(); + dir2.close(); + } + + private Directory getDir1(Random random) throws IOException { + Directory dir1 = newDirectory(); + IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document d1 = new Document(); + d1.add(newField("f1", "v1", TextField.TYPE_STORED)); + d1.add(newField("f2", "v1", TextField.TYPE_STORED)); + w1.addDocument(d1); + Document d2 = new Document(); + d2.add(newField("f1", "v2", TextField.TYPE_STORED)); + d2.add(newField("f2", "v2", TextField.TYPE_STORED)); + w1.addDocument(d2); + w1.close(); + return dir1; + } + + private Directory getDir2(Random random) throws IOException { + Directory dir2 = newDirectory(); + IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); + Document d3 = new Document(); + d3.add(newField("f3", "v1", TextField.TYPE_STORED)); + d3.add(newField("f4", "v1", TextField.TYPE_STORED)); + w2.addDocument(d3); + Document d4 = new Document(); + d4.add(newField("f3", "v2", TextField.TYPE_STORED)); + d4.add(newField("f4", "v2", TextField.TYPE_STORED)); + w2.addDocument(d4); + w2.close(); + return dir2; + } +} diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java index d5896a45a27..ac28283f061 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestFieldNormModifier.java @@ -22,7 +22,8 @@ import java.util.Arrays; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DefaultSimilarity; @@ -65,13 +66,15 @@ public class TestFieldNormModifier extends LuceneTestCase { for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); - d.add(newField("field", "word", Field.Store.YES, Field.Index.ANALYZED)); - d.add(newField("nonorm", "word", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - d.add(newField("untokfield", "20061212 20071212", Field.Store.YES, Field.Index.ANALYZED)); + + d.add(newField("field", "word", TextField.TYPE_STORED)); + + d.add(newField("nonorm", "word", StringField.TYPE_STORED)); + d.add(newField("untokfield", "20061212 20071212", TextField.TYPE_STORED)); for (int j = 1; j <= i; j++) { - d.add(newField("field", "crap", Field.Store.YES, Field.Index.ANALYZED)); - d.add(newField("nonorm", "more words", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + d.add(newField("field", "crap", TextField.TYPE_STORED)); + d.add(newField("nonorm", "more words", StringField.TYPE_STORED)); } writer.addDocument(d); } diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java index 8bbec0d2e3f..4ef088afae8 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestIndexSplitter.java @@ -21,6 +21,7 @@ import java.io.File; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -112,10 +113,10 @@ public class TestIndexSplitter extends LuceneTestCase { Directory fsDir = newFSDirectory(indexPath); IndexWriter indexWriter = new IndexWriter(fsDir, iwConfig); Document doc = new Document(); - doc.add(new Field("content", "doc 1", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); + doc.add(new Field("content", StringField.TYPE_STORED, "doc 1")); indexWriter.addDocument(doc); doc = new Document(); - doc.add(new Field("content", "doc 2", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); + doc.add(new Field("content", StringField.TYPE_STORED, "doc 2")); indexWriter.addDocument(doc); indexWriter.close(); fsDir.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java similarity index 87% rename from lucene/src/test/org/apache/lucene/index/TestLazyBug.java rename to lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java index 560c424dd5e..bc0973a681f 100755 --- a/lucene/src/test/org/apache/lucene/index/TestLazyBug.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestLazyBug.java @@ -22,7 +22,11 @@ import java.util.List; import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.*; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.document.FieldSelectorResult; +import org.apache.lucene.document.FieldSelectorVisitor; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.junit.AfterClass; @@ -87,8 +91,7 @@ public class TestLazyBug extends LuceneTestCase { doc.add(newField("f"+f, data[f % data.length] + '#' + data[random.nextInt(data.length)], - Field.Store.NO, - Field.Index.ANALYZED)); + TextField.TYPE_UNSTORED)); } writer.addDocument(doc); } @@ -102,12 +105,14 @@ public class TestLazyBug extends LuceneTestCase { public void doTest(int[] docs) throws Exception { IndexReader reader = IndexReader.open(directory, true); for (int i = 0; i < docs.length; i++) { - Document d = reader.document(docs[i], SELECTOR); + final FieldSelectorVisitor visitor = new FieldSelectorVisitor(SELECTOR); + reader.document(docs[i], visitor); + Document d = visitor.getDocument(); d.get(MAGIC_FIELD); - List fields = d.getFields(); - for (Iterator fi = fields.iterator(); fi.hasNext(); ) { - Fieldable f=null; + List fields = d.getFields(); + for (Iterator fi = fields.iterator(); fi.hasNext(); ) { + IndexableField f=null; try { f = fi.next(); String fname = f.name(); @@ -136,5 +141,4 @@ public class TestLazyBug extends LuceneTestCase { public void testLazyBroken() throws Exception { doTest(new int[] { NUM_DOCS/2, NUM_DOCS-1 }); } - } diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java index 29b654a10f9..3f4d609d53a 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestMultiPassIndexSplitter.java @@ -18,10 +18,11 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; public class TestMultiPassIndexSplitter extends LuceneTestCase { IndexReader input; @@ -36,8 +37,8 @@ public class TestMultiPassIndexSplitter extends LuceneTestCase { Document doc; for (int i = 0; i < NUM_DOCS; i++) { doc = new Document(); - doc.add(newField("id", i + "", Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(newField("f", i + " " + i, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("id", i + "", StringField.TYPE_STORED)); + doc.add(newField("f", i + " " + i, TextField.TYPE_STORED)); w.addDocument(doc); } w.close(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestNRTManager.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestNRTManager.java index 880f869da15..d2301717f1e 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestNRTManager.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestNRTManager.java @@ -33,7 +33,8 @@ import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PhraseQuery; @@ -77,17 +78,12 @@ public class TestNRTManager extends LuceneTestCase { // TODO: is there a pre-existing way to do this!!! private Document cloneDoc(Document doc1) { final Document doc2 = new Document(); - for(Fieldable f : doc1.getFields()) { + for(IndexableField f : doc1) { Field field1 = (Field) f; Field field2 = new Field(field1.name(), - field1.stringValue(), - field1.isStored() ? Field.Store.YES : Field.Store.NO, - field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO); - if (field1.getOmitNorms()) { - field2.setOmitNorms(true); - } - field2.setIndexOptions(field1.getIndexOptions()); + ((Field) f).getFieldType(), + field1.stringValue()); doc2.add(field2); } @@ -240,7 +236,7 @@ public class TestNRTManager extends LuceneTestCase { final String addedField; if (random.nextBoolean()) { addedField = "extra" + random.nextInt(10); - doc.add(new Field(addedField, "a random field", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new TextField(addedField, "a random field")); } else { addedField = null; } @@ -262,7 +258,7 @@ public class TestNRTManager extends LuceneTestCase { packID = packCount.getAndIncrement() + ""; } - final Field packIDField = newField("packID", packID, Field.Store.YES, Field.Index.NOT_ANALYZED); + final Field packIDField = newField("packID", packID, StringField.TYPE_STORED); final List docIDs = new ArrayList(); final SubDocs subDocs = new SubDocs(packID, docIDs); final List docsList = new ArrayList(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java index 15815ba1c29..25dab63c430 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestPKIndexSplitter.java @@ -23,8 +23,8 @@ import java.text.NumberFormat; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -102,15 +102,15 @@ public class TestPKIndexSplitter extends LuceneTestCase { StringBuilder sb = new StringBuilder(); Document doc = new Document(); String id = format.format(n); - doc.add(newField("id", id, Store.YES, Index.NOT_ANALYZED)); - doc.add(newField("indexname", indexName, Store.YES, Index.NOT_ANALYZED)); + doc.add(newField("id", id, StringField.TYPE_STORED)); + doc.add(newField("indexname", indexName, StringField.TYPE_STORED)); sb.append("a"); sb.append(n); - doc.add(newField("field1", sb.toString(), Store.YES, Index.ANALYZED)); + doc.add(newField("field1", sb.toString(), TextField.TYPE_STORED)); sb.append(" b"); sb.append(n); for (int i = 1; i < numFields; i++) { - doc.add(newField("field" + (i + 1), sb.toString(), Store.YES, Index.ANALYZED)); + doc.add(newField("field" + (i + 1), sb.toString(), TextField.TYPE_STORED)); } return doc; } diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java index dd79d727835..120c8477aa1 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/TestTermVectorAccessor.java @@ -2,7 +2,8 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -30,33 +31,42 @@ public class TestTermVectorAccessor extends LuceneTestCase { Document doc; doc = new Document(); - doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("a", "a b a c a d a e a f a g a h a", customType)); + doc.add(newField("b", "a b c b d b e b f b g b h b", customType)); + doc.add(newField("c", "a c b c d c e c f c g c h c", customType)); iw.addDocument(doc); doc = new Document(); - doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); - doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); - doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); + FieldType customType2 = new FieldType(TextField.TYPE_UNSTORED); + customType2.setStoreTermVectors(true); + customType2.setStoreTermVectorPositions(true); + doc.add(newField("a", "a b a c a d a e a f a g a h a", customType2)); + doc.add(newField("b", "a b c b d b e b f b g b h b", customType2)); + doc.add(newField("c", "a c b c d c e c f c g c h c", customType2)); iw.addDocument(doc); doc = new Document(); - doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); - doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); - doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); + FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED); + customType3.setStoreTermVectors(true); + doc.add(newField("a", "a b a c a d a e a f a g a h a", customType3)); + doc.add(newField("b", "a b c b d b e b f b g b h b", customType3)); + doc.add(newField("c", "a c b c d c e c f c g c h c", customType3)); iw.addDocument(doc); doc = new Document(); - doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); - doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); - doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(newField("a", "a b a c a d a e a f a g a h a", TextField.TYPE_UNSTORED)); + doc.add(newField("b", "a b c b d b e b f b g b h b", TextField.TYPE_UNSTORED)); + doc.add(newField("c", "a c b c d c e c f c g c h c", TextField.TYPE_UNSTORED)); iw.addDocument(doc); doc = new Document(); - doc.add(newField("a", "a b a c a d a e a f a g a h a", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(newField("b", "a b c b d b e b f b g b h b", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO)); - doc.add(newField("c", "a c b c d c e c f c g c h c", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES)); + doc.add(newField("a", "a b a c a d a e a f a g a h a", customType)); + doc.add(newField("b", "a b c b d b e b f b g b h b", TextField.TYPE_UNSTORED)); + doc.add(newField("c", "a c b c d c e c f c g c h c", customType3)); iw.addDocument(doc); iw.close(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java index 53ee338a870..4c6c72f1b63 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/index/codecs/appending/TestAppendingCodec.java @@ -22,9 +22,8 @@ import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Fields; import org.apache.lucene.index.IndexReader; @@ -141,7 +140,11 @@ public class TestAppendingCodec extends LuceneTestCase { ((TieredMergePolicy)cfg.getMergePolicy()).setUseCompoundFile(false); IndexWriter writer = new IndexWriter(dir, cfg); Document doc = new Document(); - doc.add(newField("f", text, Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + FieldType storedTextType = new FieldType(TextField.TYPE_STORED); + storedTextType.setStoreTermVectors(true); + storedTextType.setStoreTermVectorPositions(true); + storedTextType.setStoreTermVectorOffsets(true); + doc.add(newField("f", text, storedTextType)); writer.addDocument(doc); writer.commit(); writer.addDocument(doc); @@ -149,8 +152,8 @@ public class TestAppendingCodec extends LuceneTestCase { writer.close(); IndexReader reader = IndexReader.open(dir, null, true, 1, new AppendingCodecProvider()); assertEquals(2, reader.numDocs()); - doc = reader.document(0); - assertEquals(text, doc.get("f")); + Document doc2 = reader.document(0); + assertEquals(text, doc2.get("f")); Fields fields = MultiFields.getFields(reader); Terms terms = fields.terms("f"); assertNotNull(terms); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java index 5d6eb8ad8a9..4176cb4a636 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestHighFreqTerms.java @@ -20,7 +20,7 @@ package org.apache.lucene.misc; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; @@ -203,9 +203,9 @@ public class TestHighFreqTerms extends LuceneTestCase { Document doc = new Document(); String content = getContent(i); - doc.add(newField(random, "FIELD_1", content, Field.Store.YES,Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(newField(random, "FIELD_1", content, TextField.TYPE_STORED)); //add a different field - doc.add(newField(random, "different_field", "diff", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(newField(random, "different_field", "diff", TextField.TYPE_STORED)); writer.addDocument(doc); } @@ -213,7 +213,7 @@ public class TestHighFreqTerms extends LuceneTestCase { //highest freq terms for a specific field. for (int i = 1; i <= 10; i++) { Document doc = new Document(); - doc.add(newField(random, "different_field", "diff", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(newField(random, "different_field", "diff", TextField.TYPE_STORED)); writer.addDocument(doc); } // add some docs where tf < df so we can see if sorting works @@ -224,7 +224,7 @@ public class TestHighFreqTerms extends LuceneTestCase { for (int i = 0; i < highTF; i++) { content += "highTF "; } - doc.add(newField(random, "FIELD_1", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(newField(random, "FIELD_1", content, TextField.TYPE_STORED)); writer.addDocument(doc); // highTF medium df =5 int medium_df = 5; @@ -235,7 +235,7 @@ public class TestHighFreqTerms extends LuceneTestCase { for (int j = 0; j < tf; j++) { newcontent += "highTFmedDF "; } - newdoc.add(newField(random, "FIELD_1", newcontent, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + newdoc.add(newField(random, "FIELD_1", newcontent, TextField.TYPE_STORED)); writer.addDocument(newdoc); } // add a doc with high tf in field different_field @@ -245,7 +245,7 @@ public class TestHighFreqTerms extends LuceneTestCase { for (int i = 0; i < targetTF; i++) { content += "TF150 "; } - doc.add(newField(random, "different_field", content, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + doc.add(newField(random, "different_field", content, TextField.TYPE_STORED)); writer.addDocument(doc); writer.close(); diff --git a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java index 1ab8e7c335a..6a18043cddc 100644 --- a/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/misc/TestLengthNormModifier.java @@ -21,11 +21,12 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.FieldNormModifier; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiNorms; import org.apache.lucene.index.Term; @@ -70,16 +71,12 @@ public class TestLengthNormModifier extends LuceneTestCase { for (int i = 0; i < NUM_DOCS; i++) { Document d = new Document(); - d.add(newField("field", "word", - Field.Store.YES, Field.Index.ANALYZED)); - d.add(newField("nonorm", "word", - Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + d.add(newField("field", "word", TextField.TYPE_STORED)); + d.add(newField("nonorm", "word", StringField.TYPE_STORED)); for (int j = 1; j <= i; j++) { - d.add(newField("field", "crap", - Field.Store.YES, Field.Index.ANALYZED)); - d.add(newField("nonorm", "more words", - Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + d.add(newField("field", "crap", TextField.TYPE_STORED)); + d.add(newField("nonorm", "more words", StringField.TYPE_STORED)); } writer.addDocument(d); } diff --git a/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java b/lucene/contrib/misc/src/test/org/apache/lucene/search/TestThreadSafe.java similarity index 69% rename from lucene/src/test/org/apache/lucene/search/TestThreadSafe.java rename to lucene/contrib/misc/src/test/org/apache/lucene/search/TestThreadSafe.java index 110bb94219f..aa307c66d93 100755 --- a/lucene/src/test/org/apache/lucene/search/TestThreadSafe.java +++ b/lucene/contrib/misc/src/test/org/apache/lucene/search/TestThreadSafe.java @@ -22,11 +22,11 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; import java.util.Random; -import java.util.List; import java.util.concurrent.atomic.AtomicBoolean; import java.io.IOException; @@ -48,16 +48,16 @@ public class TestThreadSafe extends LuceneTestCase { } @Override - public void run() { + public void run() { try { for (int i=0; i fields = doc.getFields(); - for (final Fieldable f : fields ) { + for (final IndexableField f : doc ) { validateField(f); } @@ -93,7 +98,7 @@ public class TestThreadSafe extends LuceneTestCase { } - void validateField(Fieldable f) { + void validateField(IndexableField f) { String val = f.stringValue(); if (!val.startsWith("^") || !val.endsWith("$")) { throw new RuntimeException("Invalid field:" + f.toString() + " val=" +val); @@ -104,7 +109,7 @@ public class TestThreadSafe extends LuceneTestCase { void buildDir(Directory dir, int nDocs, int maxFields, int maxFieldLen) throws IOException { IndexWriter iw = new IndexWriter(dir, new IndexWriterConfig( - TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); + TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMaxBufferedDocs(10)); for (int j=0; j - * http://www.apache.org/licenses/LICENSE-2.0 - *

    - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ public class TestParser extends LuceneTestCase { @@ -63,9 +65,9 @@ public class TestParser extends LuceneTestCase { int endOfDate = line.indexOf('\t'); String date = line.substring(0, endOfDate).trim(); String content = line.substring(endOfDate).trim(); - org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); - doc.add(newField("date", date, Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("contents", content, Field.Store.YES, Field.Index.ANALYZED)); + Document doc = new Document(); + doc.add(newField("date", date, TextField.TYPE_STORED)); + doc.add(newField("contents", content, TextField.TYPE_STORED)); NumericField numericField = new NumericField("date2"); numericField.setIntValue(Integer.valueOf(date)); doc.add(numericField); @@ -217,7 +219,7 @@ public class TestParser extends LuceneTestCase { System.out.println("=========" + qType + "============"); ScoreDoc[] scoreDocs = hits.scoreDocs; for (int i = 0; i < Math.min(numDocs, hits.totalHits); i++) { - org.apache.lucene.document.Document ldoc = searcher.doc(scoreDocs[i].doc); + Document ldoc = searcher.doc(scoreDocs[i].doc); System.out.println("[" + ldoc.get("date") + "]" + ldoc.get("contents")); } System.out.println(); diff --git a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java index f5cdcd7f111..041b493e986 100644 --- a/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java +++ b/lucene/contrib/xml-query-parser/src/test/org/apache/lucene/xmlparser/TestQueryTemplateManager.java @@ -1,24 +1,5 @@ package org.apache.lucene.xmlparser; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Constants; -import org.apache.lucene.util.LuceneTestCase; -import org.w3c.dom.Document; -import org.xml.sax.SAXException; - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; -import java.io.IOException; -import java.util.Locale; -import java.util.Properties; -import java.util.StringTokenizer; - /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with @@ -36,6 +17,26 @@ import java.util.StringTokenizer; * limitations under the License. */ +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.LuceneTestCase; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.TransformerException; +import java.io.IOException; +import java.util.Locale; +import java.util.Properties; +import java.util.StringTokenizer; + + /** * This class illustrates how form input (such as from a web page or Swing gui) can be * turned into Lucene queries using a choice of XSL templates for different styles of queries. @@ -125,7 +126,7 @@ public class TestQueryTemplateManager extends LuceneTestCase { String name = st.nextToken().trim(); if (st.hasMoreTokens()) { String value = st.nextToken().trim(); - result.add(newField(name, value, Field.Store.YES, Field.Index.ANALYZED)); + result.add(newField(name, value, TextField.TYPE_STORED)); } } return result; diff --git a/lucene/src/java/org/apache/lucene/analysis/Analyzer.java b/lucene/src/java/org/apache/lucene/analysis/Analyzer.java index d03814f3501..43cf759cadf 100644 --- a/lucene/src/java/org/apache/lucene/analysis/Analyzer.java +++ b/lucene/src/java/org/apache/lucene/analysis/Analyzer.java @@ -22,11 +22,10 @@ import java.io.IOException; import java.io.Closeable; import java.lang.reflect.Modifier; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.store.AlreadyClosedException; -import org.apache.lucene.document.Fieldable; - /** An Analyzer builds TokenStreams, which analyze text. It thus represents a * policy for extracting index terms from text. *

    @@ -111,16 +110,16 @@ public abstract class Analyzer implements Closeable { } /** - * Invoked before indexing a Fieldable instance if + * Invoked before indexing a IndexableField instance if * terms have already been added to that field. This allows custom * analyzers to place an automatic position increment gap between - * Fieldable instances using the same field name. The default value + * IndexbleField instances using the same field name. The default value * position increment gap is 0. With a 0 position increment gap and * the typical default token position increment of 1, all terms in a field, - * including across Fieldable instances, are in successive positions, allowing - * exact PhraseQuery matches, for instance, across Fieldable instance boundaries. + * including across IndexableField instances, are in successive positions, allowing + * exact PhraseQuery matches, for instance, across IndexableField instance boundaries. * - * @param fieldName Fieldable name being indexed. + * @param fieldName IndexableField name being indexed. * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} */ public int getPositionIncrementGap(String fieldName) { @@ -138,11 +137,12 @@ public abstract class Analyzer implements Closeable { * @param field the field just indexed * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)} */ - public int getOffsetGap(Fieldable field) { - if (field.isTokenized()) + public int getOffsetGap(IndexableField field) { + if (field.tokenized()) { return 1; - else + } else { return 0; + } } /** Frees persistent resources used by this Analyzer */ diff --git a/lucene/src/java/org/apache/lucene/analysis/package.html b/lucene/src/java/org/apache/lucene/analysis/package.html index 45d9d737c02..7200f4f6417 100644 --- a/lucene/src/java/org/apache/lucene/analysis/package.html +++ b/lucene/src/java/org/apache/lucene/analysis/package.html @@ -120,7 +120,7 @@ There are many post tokenization steps that can be done, including (but not limi Applications usually do not invoke analysis – Lucene does it for them:

    • At indexing, as a consequence of - {@link org.apache.lucene.index.IndexWriter#addDocument(org.apache.lucene.document.Document) addDocument(doc)}, + {@link org.apache.lucene.index.IndexWriter#addDocument(Iterable) addDocument(doc)}, the Analyzer in effect for indexing is invoked for each indexed field of the added document.
    • At search, a QueryParser may invoke the Analyzer during parsing. Note that for some queries, analysis does not @@ -170,7 +170,7 @@ the source code of any one of the many samples located in this package.

      Field Section Boundaries

      - When {@link org.apache.lucene.document.Document#add(org.apache.lucene.document.Fieldable) document.add(field)} + When {@link org.apache.lucene.document.Document#add(org.apache.lucene.index.IndexableField) document.add(field)} is called multiple times for the same field name, we could say that each such call creates a new section for that field in that document. In fact, a separate call to diff --git a/lucene/src/java/org/apache/lucene/document/AbstractField.java b/lucene/src/java/org/apache/lucene/document/AbstractField.java deleted file mode 100755 index 148e853b237..00000000000 --- a/lucene/src/java/org/apache/lucene/document/AbstractField.java +++ /dev/null @@ -1,312 +0,0 @@ -package org.apache.lucene.document; -/** - * Copyright 2006 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.search.PhraseQuery; // for javadocs -import org.apache.lucene.search.spans.SpanQuery; // for javadocs -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.FieldInvertState; // for javadocs -import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.index.values.ValueType; - - -/** - * - * - **/ -public abstract class AbstractField implements Fieldable { - - protected String name = "body"; - protected boolean storeTermVector = false; - protected boolean storeOffsetWithTermVector = false; - protected boolean storePositionWithTermVector = false; - protected boolean omitNorms = false; - protected boolean isStored = false; - protected boolean isIndexed = true; - protected boolean isTokenized = true; - protected boolean isBinary = false; - protected boolean lazy = false; - protected IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; - protected float boost = 1.0f; - // the data object for all different kind of field values - protected Object fieldsData = null; - // pre-analyzed tokenStream for indexed fields - protected TokenStream tokenStream; - // length/offset for all primitive types - protected int binaryLength; - protected int binaryOffset; - protected PerDocFieldValues docValues; - - protected AbstractField() - { - } - - protected AbstractField(String name, Field.Store store, Field.Index index, Field.TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - this.name = name; - - this.isStored = store.isStored(); - this.isIndexed = index.isIndexed(); - this.isTokenized = index.isAnalyzed(); - this.omitNorms = index.omitNorms(); - - this.isBinary = false; - - setStoreTermVector(termVector); - } - - /** Sets the boost factor hits on this field. This value will be - * multiplied into the score of all hits on this this field of this - * document. - * - *

      The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * used to compute the norm factor for the field. By - * default, in the {@link - * org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied - * by the length normalization factor and then - * rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState) - * @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float) - */ - public void setBoost(float boost) { - this.boost = boost; - } - - /** Returns the boost factor for hits for this field. - * - *

      The default value is 1.0. - * - *

      Note: this value is not stored directly with the document in the index. - * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and - * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when - * this field was indexed. - * - * @see #setBoost(float) - */ - public float getBoost() { - return boost; - } - - /** Returns the name of the field. - * For example "date", "title", "body", ... - */ - public String name() { return name; } - - protected void setStoreTermVector(Field.TermVector termVector) { - this.storeTermVector = termVector.isStored(); - this.storePositionWithTermVector = termVector.withPositions(); - this.storeOffsetWithTermVector = termVector.withOffsets(); - } - - /** True iff the value of the field is to be stored in the index for return - with search hits. It is an error for this to be true if a field is - Reader-valued. */ - public final boolean isStored() { return isStored; } - - /** True iff the value of the field is to be indexed, so that it may be - searched on. */ - public final boolean isIndexed() { return isIndexed; } - - /** True iff the value of the field should be tokenized as text prior to - indexing. Un-tokenized fields are indexed as a single word and may not be - Reader-valued. */ - public final boolean isTokenized() { return isTokenized; } - - /** True iff the term or terms used to index this field are stored as a term - * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. - * These methods do not provide access to the original content of the field, - * only to terms used to index it. If the original content must be - * preserved, use the stored attribute instead. - * - * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) - */ - public final boolean isTermVectorStored() { return storeTermVector; } - - /** - * True iff terms are stored as term vector together with their offsets - * (start and end position in source text). - */ - public boolean isStoreOffsetWithTermVector(){ - return storeOffsetWithTermVector; - } - - /** - * True iff terms are stored as term vector together with their token positions. - */ - public boolean isStorePositionWithTermVector(){ - return storePositionWithTermVector; - } - - /** True iff the value of the filed is stored as binary */ - public final boolean isBinary() { - return isBinary; - } - - - /** - * Return the raw byte[] for the binary field. Note that - * you must also call {@link #getBinaryLength} and {@link - * #getBinaryOffset} to know which range of bytes in this - * returned array belong to the field. - * @return reference to the Field value as byte[]. - */ - public byte[] getBinaryValue() { - return getBinaryValue(null); - } - - public byte[] getBinaryValue(byte[] result){ - if (isBinary || fieldsData instanceof byte[]) - return (byte[]) fieldsData; - else - return null; - } - - /** - * Returns length of byte[] segment that is used as value, if Field is not binary - * returned value is undefined - * @return length of byte[] segment that represents this Field value - */ - public int getBinaryLength() { - if (isBinary) { - return binaryLength; - } else if (fieldsData instanceof byte[]) - return ((byte[]) fieldsData).length; - else - return 0; - } - - /** - * Returns offset into byte[] segment that is used as value, if Field is not binary - * returned value is undefined - * @return index of the first character in byte[] segment that represents this Field value - */ - public int getBinaryOffset() { - return binaryOffset; - } - - /** True if norms are omitted for this indexed field */ - public boolean getOmitNorms() { return omitNorms; } - - /** @see #setIndexOptions */ - public IndexOptions getIndexOptions() { return indexOptions; } - - /** Expert: - * - * If set, omit normalization factors associated with this indexed field. - * This effectively disables indexing boosts and length normalization for this field. - */ - public void setOmitNorms(boolean omitNorms) { this.omitNorms=omitNorms; } - - /** Expert: - * - * If set, omit term freq, and optionally also positions and payloads from - * postings for this field. - * - *

      NOTE: While this option reduces storage space - * required in the index, it also means any query - * requiring positional information, such as {@link - * PhraseQuery} or {@link SpanQuery} subclasses will - * silently fail to find results. - */ - public void setIndexOptions(IndexOptions indexOptions) { this.indexOptions=indexOptions; } - - public boolean isLazy() { - return lazy; - } - - /** Prints a Field for human consumption. */ - @Override - public final String toString() { - StringBuilder result = new StringBuilder(); - if (isStored) { - result.append("stored"); - } - if (isIndexed) { - if (result.length() > 0) - result.append(","); - result.append("indexed"); - } - if (isTokenized) { - if (result.length() > 0) - result.append(","); - result.append("tokenized"); - } - if (storeTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVector"); - } - if (storeOffsetWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorOffsets"); - } - if (storePositionWithTermVector) { - if (result.length() > 0) - result.append(","); - result.append("termVectorPosition"); - } - if (isBinary) { - if (result.length() > 0) - result.append(","); - result.append("binary"); - } - if (omitNorms) { - result.append(",omitNorms"); - } - if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { - result.append(",indexOptions="); - result.append(indexOptions); - } - if (lazy){ - result.append(",lazy"); - } - result.append('<'); - result.append(name); - result.append(':'); - - if (fieldsData != null && lazy == false) { - result.append(fieldsData); - } - - result.append('>'); - return result.toString(); - } - - public PerDocFieldValues getDocValues() { - return docValues; - } - - public void setDocValues(PerDocFieldValues docValues) { - this.docValues = docValues; - } - - public boolean hasDocValues() { - return docValues != null && docValues.type() != null; - } - - public ValueType docValuesType() { - return docValues == null? null : docValues.type(); - } -} diff --git a/lucene/src/java/org/apache/lucene/document/BinaryField.java b/lucene/src/java/org/apache/lucene/document/BinaryField.java new file mode 100644 index 00000000000..0776afdef3f --- /dev/null +++ b/lucene/src/java/org/apache/lucene/document/BinaryField.java @@ -0,0 +1,49 @@ +package org.apache.lucene.document; + +import org.apache.lucene.util.BytesRef; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public final class BinaryField extends Field { + + public static final FieldType TYPE_STORED = new FieldType(); + static { + TYPE_STORED.setStored(true); + TYPE_STORED.freeze(); + } + + public BinaryField(String name, byte[] value) { + super(name, BinaryField.TYPE_STORED, value); + } + + public BinaryField(String name, byte[] value, int offset, int length) { + super(name, BinaryField.TYPE_STORED, value, offset, length); + } + + public BinaryField(String name, BytesRef bytes) { + super(name, BinaryField.TYPE_STORED, bytes); + } + + public BinaryField(String name, FieldType custom, byte[] value) { + super(name, custom, value); + } + + public boolean isNumeric() { + return false; + } +} diff --git a/lucene/src/java/org/apache/lucene/document/CompressionTools.java b/lucene/src/java/org/apache/lucene/document/CompressionTools.java index 78e5949d312..52d7cf9f7e6 100644 --- a/lucene/src/java/org/apache/lucene/document/CompressionTools.java +++ b/lucene/src/java/org/apache/lucene/document/CompressionTools.java @@ -92,16 +92,24 @@ public class CompressionTools { return compress(result.bytes, 0, result.length, compressionLevel); } + public static byte[] decompress(BytesRef bytes) throws DataFormatException { + return decompress(bytes.bytes, bytes.offset, bytes.length); + } + + public static byte[] decompress(byte[] value) throws DataFormatException { + return decompress(value, 0, value.length); + } + /** Decompress the byte array previously returned by * compress */ - public static byte[] decompress(byte[] value) throws DataFormatException { + public static byte[] decompress(byte[] value, int offset, int length) throws DataFormatException { // Create an expandable byte array to hold the decompressed data - ByteArrayOutputStream bos = new ByteArrayOutputStream(value.length); + ByteArrayOutputStream bos = new ByteArrayOutputStream(length); Inflater decompressor = new Inflater(); try { - decompressor.setInput(value); + decompressor.setInput(value, offset, length); // Decompress the data final byte[] buf = new byte[1024]; @@ -119,9 +127,17 @@ public class CompressionTools { /** Decompress the byte array previously returned by * compressString back into a String */ public static String decompressString(byte[] value) throws DataFormatException { - final byte[] bytes = decompress(value); + return decompressString(value, 0, value.length); + } + + public static String decompressString(byte[] value, int offset, int length) throws DataFormatException { + final byte[] bytes = decompress(value, offset, length); CharsRef result = new CharsRef(bytes.length); UnicodeUtil.UTF8toUTF16(bytes, 0, bytes.length, result); return new String(result.chars, 0, result.length); } + + public static String decompressString(BytesRef bytes) throws DataFormatException { + return decompressString(bytes.bytes, bytes.offset, bytes.length); + } } diff --git a/lucene/src/java/org/apache/lucene/document/Document.java b/lucene/src/java/org/apache/lucene/document/Document.java index 5d8262c8940..fc806aa3765 100644 --- a/lucene/src/java/org/apache/lucene/document/Document.java +++ b/lucene/src/java/org/apache/lucene/document/Document.java @@ -17,61 +17,55 @@ package org.apache.lucene.document; * limitations under the License. */ -import java.util.*; // for javadoc +import java.util.*; + +import org.apache.lucene.index.IndexReader; // for javadoc +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.IndexSearcher; // for javadoc import org.apache.lucene.search.ScoreDoc; // for javadoc -import org.apache.lucene.index.IndexReader; // for javadoc +import org.apache.lucene.util.BytesRef; /** Documents are the unit of indexing and search. * * A Document is a set of fields. Each field has a name and a textual value. - * A field may be {@link Fieldable#isStored() stored} with the document, in which + * A field may be {@link IndexableField#stored() stored} with the document, in which * case it is returned with search hits on the document. Thus each document * should typically contain one or more stored fields which uniquely identify * it. * - *

      Note that fields which are not {@link Fieldable#isStored() stored} are + *

      Note that fields which are not {@link IndexableField#stored() stored} are * not available in documents retrieved from the index, e.g. with {@link * ScoreDoc#doc} or {@link IndexReader#document(int)}. */ -public final class Document { - List fields = new ArrayList(); - private float boost = 1.0f; +public final class Document implements Iterable { + + private final List fields = new ArrayList(); /** Constructs a new document with no fields. */ public Document() {} + @Override + public Iterator iterator() { - /** Sets a boost factor for hits on any field of this document. This value - * will be multiplied into the score of all hits on this document. - * - *

      The default value is 1.0. - * - *

      Values are multiplied into the value of {@link Fieldable#getBoost()} of - * each field in this document. Thus, this method in effect sets a default - * boost for the fields of this document. - * - * @see Fieldable#setBoost(float) - */ - public void setBoost(float boost) { - this.boost = boost; - } + return new Iterator() { + private int fieldUpto = 0; + + @Override + public boolean hasNext() { + return fieldUpto < fields.size(); + } - /** Returns, at indexing time, the boost factor as set by {@link #setBoost(float)}. - * - *

      Note that once a document is indexed this value is no longer available - * from the index. At search time, for retrieved documents, this method always - * returns 1. This however does not mean that the boost value set at indexing - * time was ignored - it was just combined with other indexing time factors and - * stored elsewhere, for better indexing and search performance. (For more - * information see the "norm(t,d)" part of the scoring formula in - * {@link org.apache.lucene.search.Similarity Similarity}.) - * - * @see #setBoost(float) - */ - public float getBoost() { - return boost; + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public IndexableField next() { + return fields.get(fieldUpto++); + } + }; } /** @@ -84,7 +78,7 @@ public final class Document { * a document has to be deleted from an index and a new changed version of that * document has to be added.

      */ - public final void add(Fieldable field) { + public final void add(IndexableField field) { fields.add(field); } @@ -99,9 +93,9 @@ public final class Document { * document has to be added.

      */ public final void removeField(String name) { - Iterator it = fields.iterator(); + Iterator it = fields.iterator(); while (it.hasNext()) { - Fieldable field = it.next(); + IndexableField field = it.next(); if (field.name().equals(name)) { it.remove(); return; @@ -119,148 +113,15 @@ public final class Document { * document has to be added.

      */ public final void removeFields(String name) { - Iterator it = fields.iterator(); + Iterator it = fields.iterator(); while (it.hasNext()) { - Fieldable field = it.next(); + IndexableField field = it.next(); if (field.name().equals(name)) { it.remove(); } } } - /** Returns a field with the given name if any exist in this document, or - * null. If multiple fields exists with this name, this method returns the - * first value added. - * Do not use this method with lazy loaded fields or {@link NumericField}. - * @deprecated use {@link #getFieldable} instead and cast depending on - * data type. - * @throws ClassCastException if you try to retrieve a numerical or - * lazy loaded field. - */ - @Deprecated - public final Field getField(String name) { - return (Field) getFieldable(name); - } - - - /** Returns a field with the given name if any exist in this document, or - * null. If multiple fields exists with this name, this method returns the - * first value added. - */ - public Fieldable getFieldable(String name) { - for (Fieldable field : fields) { - if (field.name().equals(name)) - return field; - } - return null; - } - - /** Returns the string value of the field with the given name if any exist in - * this document, or null. If multiple fields exist with this name, this - * method returns the first value added. If only binary fields with this name - * exist, returns null. - * For {@link NumericField} it returns the string value of the number. If you want - * the actual {@code NumericField} instance back, use {@link #getFieldable}. - */ - public final String get(String name) { - for (Fieldable field : fields) { - if (field.name().equals(name) && (!field.isBinary())) - return field.stringValue(); - } - return null; - } - - /** Returns a List of all the fields in a document. - *

      Note that fields which are not {@link Fieldable#isStored() stored} are - * not available in documents retrieved from the - * index, e.g. {@link IndexSearcher#doc(int)} or {@link - * IndexReader#document(int)}. - */ - public final List getFields() { - return fields; - } - - private final static Field[] NO_FIELDS = new Field[0]; - - /** - * Returns an array of {@link Field}s with the given name. - * This method returns an empty array when there are no - * matching fields. It never returns null. - * Do not use this method with lazy loaded fields or {@link NumericField}. - * - * @param name the name of the field - * @return a Field[] array - * @deprecated use {@link #getFieldable} instead and cast depending on - * data type. - * @throws ClassCastException if you try to retrieve a numerical or - * lazy loaded field. - */ - @Deprecated - public final Field[] getFields(String name) { - List result = new ArrayList(); - for (Fieldable field : fields) { - if (field.name().equals(name)) { - result.add((Field) field); - } - } - - if (result.size() == 0) - return NO_FIELDS; - - return result.toArray(new Field[result.size()]); - } - - - private final static Fieldable[] NO_FIELDABLES = new Fieldable[0]; - - /** - * Returns an array of {@link Fieldable}s with the given name. - * This method returns an empty array when there are no - * matching fields. It never returns null. - * - * @param name the name of the field - * @return a Fieldable[] array - */ - public Fieldable[] getFieldables(String name) { - List result = new ArrayList(); - for (Fieldable field : fields) { - if (field.name().equals(name)) { - result.add(field); - } - } - - if (result.size() == 0) - return NO_FIELDABLES; - - return result.toArray(new Fieldable[result.size()]); - } - - - private final static String[] NO_STRINGS = new String[0]; - - /** - * Returns an array of values of the field specified as the method parameter. - * This method returns an empty array when there are no - * matching fields. It never returns null. - * For {@link NumericField}s it returns the string value of the number. If you want - * the actual {@code NumericField} instances back, use {@link #getFieldables}. - * @param name the name of the field - * @return a String[] of field values - */ - public final String[] getValues(String name) { - List result = new ArrayList(); - for (Fieldable field : fields) { - if (field.name().equals(name) && (!field.isBinary())) - result.add(field.stringValue()); - } - - if (result.size() == 0) - return NO_STRINGS; - - return result.toArray(new String[result.size()]); - } - - private final static byte[][] NO_BYTES = new byte[0][]; /** * Returns an array of byte arrays for of the fields that have the name specified @@ -271,17 +132,18 @@ public final class Document { * @param name the name of the field * @return a byte[][] of binary field values */ - public final byte[][] getBinaryValues(String name) { - List result = new ArrayList(); - for (Fieldable field : fields) { - if (field.name().equals(name) && (field.isBinary())) - result.add(field.getBinaryValue()); + public final BytesRef[] getBinaryValues(String name) { + final List result = new ArrayList(); + for (IndexableField field : fields) { + if (field.name().equals(name)) { + final BytesRef bytes = field.binaryValue(); + if (bytes != null) { + result.add(bytes); + } + } } - if (result.size() == 0) - return NO_BYTES; - - return result.toArray(new byte[result.size()][]); + return result.toArray(new BytesRef[result.size()]); } /** @@ -293,10 +155,72 @@ public final class Document { * @param name the name of the field. * @return a byte[] containing the binary field value or null */ - public final byte[] getBinaryValue(String name) { - for (Fieldable field : fields) { - if (field.name().equals(name) && (field.isBinary())) - return field.getBinaryValue(); + public final BytesRef getBinaryValue(String name) { + for (IndexableField field : fields) { + if (field.name().equals(name)) { + final BytesRef bytes = field.binaryValue(); + if (bytes != null) { + return bytes; + } + } + } + return null; + } + + /** Returns a field with the given name if any exist in this document, or + * null. If multiple fields exists with this name, this method returns the + * first value added. + */ + public final IndexableField getField(String name) { + for (IndexableField field : fields) { + if (field.name().equals(name)) { + return field; + } + } + return null; + } + + /** + * Returns an array of {@link IndexableField}s with the given name. + * This method returns an empty array when there are no + * matching fields. It never returns null. + * + * @param name the name of the field + * @return a Fieldable[] array + */ + public IndexableField[] getFields(String name) { + List result = new ArrayList(); + for (IndexableField field : fields) { + if (field.name().equals(name)) { + result.add(field); + } + } + + return result.toArray(new IndexableField[result.size()]); + } + + /** Returns a List of all the fields in a document. + *

      Note that fields which are not stored are + * not available in documents retrieved from the + * index, e.g. {@link IndexSearcher#doc(int)} or {@link + * IndexReader#document(int)}. + */ + public final List getFields() { + return fields; + } + + /** Returns the string value of the field with the given name if any exist in + * this document, or null. If multiple fields exist with this name, this + * method returns the first value added. If only binary fields with this name + * exist, returns null. + * For {@link NumericField} it returns the string value of the number. If you want + * the actual {@code NumericField} instance back, use {@link #getField}. + */ + public final String get(String name) { + for (IndexableField field : fields) { + if (field.name().equals(name) && field.stringValue() != null) { + return field.stringValue(); + } } return null; } @@ -307,7 +231,7 @@ public final class Document { StringBuilder buffer = new StringBuilder(); buffer.append("Document<"); for (int i = 0; i < fields.size(); i++) { - Fieldable field = fields.get(i); + IndexableField field = fields.get(i); buffer.append(field.toString()); if (i != fields.size()-1) buffer.append(" "); diff --git a/lucene/src/java/org/apache/lucene/document/Field.java b/lucene/src/java/org/apache/lucene/document/Field.java index 26fe54bb8fb..542dccd34ad 100644 --- a/lucene/src/java/org/apache/lucene/document/Field.java +++ b/lucene/src/java/org/apache/lucene/document/Field.java @@ -21,514 +21,325 @@ import java.io.Reader; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.values.PerDocFieldValues; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.util.BytesRef; /** - A field is a section of a Document. Each field has two parts, a name and a - value. Values may be free text, provided as a String or as a Reader, or they - may be atomic keywords, which are not further processed. Such keywords may - be used to represent dates, urls, etc. Fields are optionally stored in the - index, so that they may be returned with hits on the document. - */ + * A field is a section of a Document. Each field has two parts, a name and a + * value. Values may be free text, provided as a String or as a Reader, or they + * may be atomic keywords, which are not further processed. Such keywords may be + * used to represent dates, urls, etc. Fields are optionally stored in the + * index, so that they may be returned with hits on the document. + */ -public final class Field extends AbstractField implements Fieldable { +public class Field implements IndexableField { - /** Specifies whether and how a field should be stored. */ - public static enum Store { + protected FieldType type; + protected String name = "body"; + // the data object for all different kind of field values + protected Object fieldsData; + // pre-analyzed tokenStream for indexed fields + protected TokenStream tokenStream; + // length/offset for all primitive types + protected PerDocFieldValues docValues; + + protected float boost = 1.0f; - /** Store the original field value in the index. This is useful for short texts - * like a document's title which should be displayed with the results. The - * value is stored in its original form, i.e. no analyzer is used before it is - * stored. - */ - YES { - @Override - public boolean isStored() { return true; } - }, - - /** Do not store the field value in the index. */ - NO { - @Override - public boolean isStored() { return false; } - }; - - public abstract boolean isStored(); - } - - /** Specifies whether and how a field should be indexed. */ - public static enum Index { - - /** Do not index the field value. This field can thus not be searched, - * but one can still access its contents provided it is - * {@link Field.Store stored}. */ - NO { - @Override - public boolean isIndexed() { return false; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return true; } - }, - - /** Index the tokens produced by running the field's - * value through an Analyzer. This is useful for - * common text. */ - ANALYZED { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return true; } - @Override - public boolean omitNorms() { return false; } - }, - - /** Index the field's value without using an Analyzer, so it can be searched. - * As no analyzer is used the value will be stored as a single term. This is - * useful for unique Ids like product numbers. - */ - NOT_ANALYZED { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return false; } - }, - - /** Expert: Index the field's value without an Analyzer, - * and also disable the indexing of norms. Note that you - * can also separately enable/disable norms by calling - * {@link Field#setOmitNorms}. No norms means that - * index-time field and document boosting and field - * length normalization are disabled. The benefit is - * less memory usage as norms take up one byte of RAM - * per indexed field for every document in the index, - * during searching. Note that once you index a given - * field with norms disabled, enabling norms will - * have no effect. In other words, for this to have the - * above described effect on a field, one instance of - * that field must be indexed with NOT_ANALYZED_NO_NORMS - * at some point. */ - NOT_ANALYZED_NO_NORMS { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return false; } - @Override - public boolean omitNorms() { return true; } - }, - - /** Expert: Index the tokens produced by running the - * field's value through an Analyzer, and also - * separately disable the storing of norms. See - * {@link #NOT_ANALYZED_NO_NORMS} for what norms are - * and why you may want to disable them. */ - ANALYZED_NO_NORMS { - @Override - public boolean isIndexed() { return true; } - @Override - public boolean isAnalyzed() { return true; } - @Override - public boolean omitNorms() { return true; } - }; - - /** Get the best representation of the index given the flags. */ - public static Index toIndex(boolean indexed, boolean analyzed) { - return toIndex(indexed, analyzed, false); - } - - /** Expert: Get the best representation of the index given the flags. */ - public static Index toIndex(boolean indexed, boolean analyzed, boolean omitNorms) { - - // If it is not indexed nothing else matters - if (!indexed) { - return Index.NO; - } - - // typical, non-expert - if (!omitNorms) { - if (analyzed) { - return Index.ANALYZED; - } - return Index.NOT_ANALYZED; - } - - // Expert: Norms omitted - if (analyzed) { - return Index.ANALYZED_NO_NORMS; - } - return Index.NOT_ANALYZED_NO_NORMS; - } - - public abstract boolean isIndexed(); - public abstract boolean isAnalyzed(); - public abstract boolean omitNorms(); - } - - /** Specifies whether and how a field should have term vectors. */ - public static enum TermVector { - - /** Do not store term vectors. - */ - NO { - @Override - public boolean isStored() { return false; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return false; } - }, - - /** Store the term vectors of each document. A term vector is a list - * of the document's terms and their number of occurrences in that document. */ - YES { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return false; } - }, - - /** - * Store the term vector + token position information - * - * @see #YES - */ - WITH_POSITIONS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return true; } - @Override - public boolean withOffsets() { return false; } - }, - - /** - * Store the term vector + Token offset information - * - * @see #YES - */ - WITH_OFFSETS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return false; } - @Override - public boolean withOffsets() { return true; } - }, - - /** - * Store the term vector + Token position and offset information - * - * @see #YES - * @see #WITH_POSITIONS - * @see #WITH_OFFSETS - */ - WITH_POSITIONS_OFFSETS { - @Override - public boolean isStored() { return true; } - @Override - public boolean withPositions() { return true; } - @Override - public boolean withOffsets() { return true; } - }; - - /** Get the best representation of a TermVector given the flags. */ - public static TermVector toTermVector(boolean stored, boolean withOffsets, boolean withPositions) { - - // If it is not stored, nothing else matters. - if (!stored) { - return TermVector.NO; - } - - if (withOffsets) { - if (withPositions) { - return Field.TermVector.WITH_POSITIONS_OFFSETS; - } - return Field.TermVector.WITH_OFFSETS; - } - - if (withPositions) { - return Field.TermVector.WITH_POSITIONS; - } - return Field.TermVector.YES; - } - - public abstract boolean isStored(); - public abstract boolean withPositions(); - public abstract boolean withOffsets(); + public Field(String name, FieldType type) { + this.name = name; + this.type = type; } - - /** The value of the field as a String, or null. If null, the Reader value or - * binary value is used. Exactly one of stringValue(), - * readerValue(), and getBinaryValue() must be set. */ - public String stringValue() { return fieldsData instanceof String ? (String)fieldsData : null; } - - /** The value of the field as a Reader, or null. If null, the String value or - * binary value is used. Exactly one of stringValue(), - * readerValue(), and getBinaryValue() must be set. */ - public Reader readerValue() { return fieldsData instanceof Reader ? (Reader)fieldsData : null; } - - /** The TokesStream for this field to be used when indexing, or null. If null, the Reader value - * or String value is analyzed to produce the indexed tokens. */ - public TokenStream tokenStreamValue() { return tokenStream; } - - - /**

      Expert: change the value of this field. This can - * be used during indexing to re-use a single Field - * instance to improve indexing speed by avoiding GC cost - * of new'ing and reclaiming Field instances. Typically - * a single {@link Document} instance is re-used as - * well. This helps most on small documents.

      - * - *

      Each Field instance should only be used once - * within a single {@link Document} instance. See ImproveIndexingSpeed - * for details.

      */ - public void setValue(String value) { - if (isBinary) { - throw new IllegalArgumentException("cannot set a String value on a binary field"); - } - fieldsData = value; - } - - /** Expert: change the value of this field. See setValue(String). */ - public void setValue(Reader value) { - if (isBinary) { - throw new IllegalArgumentException("cannot set a Reader value on a binary field"); - } - if (isStored) { - throw new IllegalArgumentException("cannot set a Reader value on a stored field"); - } - fieldsData = value; - } - - /** Expert: change the value of this field. See setValue(String). */ - public void setValue(byte[] value) { - if (!isBinary) { - throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field"); - } - fieldsData = value; - binaryLength = value.length; - binaryOffset = 0; - } - - /** Expert: change the value of this field. See setValue(String). */ - public void setValue(byte[] value, int offset, int length) { - if (!isBinary) { - throw new IllegalArgumentException("cannot set a byte[] value on a non-binary field"); - } - fieldsData = value; - binaryLength = length; - binaryOffset = offset; - } - - /** Expert: sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return true. - * May be combined with stored values from stringValue() or getBinaryValue() */ - public void setTokenStream(TokenStream tokenStream) { - this.isIndexed = true; - this.isTokenized = true; - this.tokenStream = tokenStream; - } - - /** - * Create a field by specifying its name, value and how it will - * be saved in the index. Term vectors will not be stored in the index. - * - * @param name The name of the field - * @param value The string to process - * @param store Whether value should be stored in the index - * @param index Whether the field should be indexed, and if so, if it should - * be tokenized before indexing - * @throws NullPointerException if name or value is null - * @throws IllegalArgumentException if the field is neither stored nor indexed - */ - public Field(String name, String value, Store store, Index index) { - this(name, value, store, index, TermVector.NO); - } - - /** - * Create a field by specifying its name, value and how it will - * be saved in the index. - * - * @param name The name of the field - * @param value The string to process - * @param store Whether value should be stored in the index - * @param index Whether the field should be indexed, and if so, if it should - * be tokenized before indexing - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or value is null - * @throws IllegalArgumentException in any of the following situations: - *
        - *
      • the field is neither stored nor indexed
      • - *
      • the field is not indexed but termVector is TermVector.YES
      • - *
      - */ - public Field(String name, String value, Store store, Index index, TermVector termVector) { - if (name == null) + public Field(String name, FieldType type, Reader reader) { + if (name == null) { throw new NullPointerException("name cannot be null"); - if (value == null) - throw new NullPointerException("value cannot be null"); - if (name.length() == 0 && value.length() == 0) - throw new IllegalArgumentException("name and value cannot both be empty"); - if (index == Index.NO && store == Store.NO) - throw new IllegalArgumentException("it doesn't make sense to have a field that " - + "is neither indexed nor stored"); - if (index == Index.NO && termVector != TermVector.NO) - throw new IllegalArgumentException("cannot store term vector information " - + "for a field that is not indexed"); - - this.name = name; - - this.fieldsData = value; - - this.isStored = store.isStored(); - - this.isIndexed = index.isIndexed(); - this.isTokenized = index.isAnalyzed(); - this.omitNorms = index.omitNorms(); - if (index == Index.NO) { - // note: now this reads even wierder than before - this.indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; - } - - this.isBinary = false; - - setStoreTermVector(termVector); - } - - /** - * Create a tokenized and indexed field that is not stored. Term vectors will - * not be stored. The Reader is read only when the Document is added to the index, - * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)} - * has been called. - * - * @param name The name of the field - * @param reader The reader with the content - * @throws NullPointerException if name or reader is null - */ - public Field(String name, Reader reader) { - this(name, reader, TermVector.NO); - } - - /** - * Create a tokenized and indexed field that is not stored, optionally with - * storing term vectors. The Reader is read only when the Document is added to the index, - * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)} - * has been called. - * - * @param name The name of the field - * @param reader The reader with the content - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or reader is null - */ - public Field(String name, Reader reader, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (reader == null) + } + if (reader == null) { throw new NullPointerException("reader cannot be null"); + } this.name = name; this.fieldsData = reader; - - this.isStored = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); - } - - /** - * Create a tokenized and indexed field that is not stored. Term vectors will - * not be stored. This is useful for pre-analyzed fields. - * The TokenStream is read only when the Document is added to the index, - * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} - * has been called. - * - * @param name The name of the field - * @param tokenStream The TokenStream with the content - * @throws NullPointerException if name or tokenStream is null - */ - public Field(String name, TokenStream tokenStream) { - this(name, tokenStream, TermVector.NO); + this.type = type; } - /** - * Create a tokenized and indexed field that is not stored, optionally with - * storing term vectors. This is useful for pre-analyzed fields. - * The TokenStream is read only when the Document is added to the index, - * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} - * has been called. - * - * @param name The name of the field - * @param tokenStream The TokenStream with the content - * @param termVector Whether term vector should be stored - * @throws NullPointerException if name or tokenStream is null - */ - public Field(String name, TokenStream tokenStream, TermVector termVector) { - if (name == null) + public Field(String name, FieldType type, TokenStream tokenStream) { + if (name == null) { throw new NullPointerException("name cannot be null"); - if (tokenStream == null) + } + if (tokenStream == null) { throw new NullPointerException("tokenStream cannot be null"); + } this.name = name; this.fieldsData = null; this.tokenStream = tokenStream; - - this.isStored = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); + this.type = type; + } + + public Field(String name, FieldType type, byte[] value) { + this(name, type, value, 0, value.length); } + public Field(String name, FieldType type, byte[] value, int offset, int length) { + this.fieldsData = new BytesRef(value, offset, length); + this.type = type; + this.name = name; + } + + public Field(String name, FieldType type, BytesRef bytes) { + this.fieldsData = bytes; + this.type = type; + this.name = name; + } + + public Field(String name, FieldType type, String value) { + if (name == null) { + throw new IllegalArgumentException("name cannot be null"); + } + if (value == null) { + throw new IllegalArgumentException("value cannot be null"); + } + if (!type.stored() && !type.indexed()) { + throw new IllegalArgumentException("it doesn't make sense to have a field that " + + "is neither indexed nor stored"); + } + if (!type.indexed() && !type.tokenized() && (type.storeTermVectors())) { + throw new IllegalArgumentException("cannot store term vector information " + + "for a field that is not indexed"); + } + + this.type = type; + this.name = name; + this.fieldsData = value; + } + + /** + * The value of the field as a String, or null. If null, the Reader value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. + */ + public String stringValue() { + return fieldsData instanceof String ? (String) fieldsData : null; + } /** - * Create a stored field with binary value. Optionally the value may be compressed. - * - * @param name The name of the field - * @param value The binary value + * The value of the field as a Reader, or null. If null, the String value or + * binary value is used. Exactly one of stringValue(), readerValue(), and + * getBinaryValue() must be set. */ - public Field(String name, byte[] value) { - this(name, value, 0, value.length); + public Reader readerValue() { + return fieldsData instanceof Reader ? (Reader) fieldsData : null; } - + /** - * Create a stored field with binary value. Optionally the value may be compressed. - * - * @param name The name of the field - * @param value The binary value - * @param offset Starting offset in value where this Field's bytes are - * @param length Number of bytes to use for this Field, starting at offset + * The TokesStream for this field to be used when indexing, or null. If null, + * the Reader value or String value is analyzed to produce the indexed tokens. */ - public Field(String name, byte[] value, int offset, int length) { - - if (name == null) - throw new IllegalArgumentException("name cannot be null"); - if (value == null) - throw new IllegalArgumentException("value cannot be null"); - - this.name = name; + public TokenStream tokenStreamValue() { + return tokenStream; + } + + /** + *

      + * Expert: change the value of this field. This can be used during indexing to + * re-use a single Field instance to improve indexing speed by avoiding GC + * cost of new'ing and reclaiming Field instances. Typically a single + * {@link Document} instance is re-used as well. This helps most on small + * documents. + *

      + * + *

      + * Each Field instance should only be used once within a single + * {@link Document} instance. See ImproveIndexingSpeed for details. + *

      + */ + public void setValue(String value) { + if (isBinary()) { + throw new IllegalArgumentException( + "cannot set a String value on a binary field"); + } + fieldsData = value; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(Reader value) { + if (isBinary()) { + throw new IllegalArgumentException( + "cannot set a Reader value on a binary field"); + } + if (stored()) { + throw new IllegalArgumentException( + "cannot set a Reader value on a stored field"); + } + fieldsData = value; + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + public void setValue(byte[] value) { + if (!isBinary()) { + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); + } + fieldsData = new BytesRef(value); + } + + /** + * Expert: change the value of this field. See setValue(String). + */ + /* + public void setValue(byte[] value, int offset, int length) { + if (!isBinary) { + throw new IllegalArgumentException( + "cannot set a byte[] value on a non-binary field"); + } fieldsData = value; - - isStored = true; - isIndexed = false; - isTokenized = false; - indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; - omitNorms = true; - - isBinary = true; binaryLength = length; binaryOffset = offset; - - setStoreTermVector(TermVector.NO); + } + */ + + /** + * Expert: sets the token stream to be used for indexing and causes + * isIndexed() and isTokenized() to return true. May be combined with stored + * values from stringValue() or getBinaryValue() + */ + public void setTokenStream(TokenStream tokenStream) { + if (!indexed() || !tokenized()) { + throw new IllegalArgumentException( + "cannot set token stream on non indexed and tokenized field"); + } + this.tokenStream = tokenStream; + } + + public String name() { + return name; + } + + public float boost() { + return boost; + } + + /** Sets the boost factor hits on this field. This value will be + * multiplied into the score of all hits on this this field of this + * document. + * + *

      The boost is used to compute the norm factor for the field. By + * default, in the {@link org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, + * the boost value is multiplied by the length normalization factor and then + * rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the + * index. One should attempt to ensure that this product does not overflow + * the range of that encoding. + * + * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState) + * @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float) + */ + public void setBoost(float boost) { + this.boost = boost; + } + + public boolean numeric() { + return false; + } + + public Number numericValue() { + return null; + } + + public NumericField.DataType numericDataType() { + return null; + } + + public BytesRef binaryValue() { + if (!isBinary()) { + return null; + } else { + return (BytesRef) fieldsData; + } + } + + /** methods from inner FieldType */ + + public boolean isBinary() { + return fieldsData instanceof BytesRef; + } + + public boolean stored() { + return type.stored(); + } + + public boolean indexed() { + return type.indexed(); + } + + public boolean tokenized() { + return type.tokenized(); + } + + public boolean omitNorms() { + return type.omitNorms(); + } + + public IndexOptions indexOptions() { + return type.indexOptions(); + } + + public boolean storeTermVectors() { + return type.storeTermVectors(); + } + + public boolean storeTermVectorOffsets() { + return type.storeTermVectorOffsets(); + } + + public boolean storeTermVectorPositions() { + return type.storeTermVectorPositions(); + } + + /** Prints a Field for human consumption. */ + @Override + public String toString() { + StringBuilder result = new StringBuilder(); + result.append(type.toString()); + result.append('<'); + result.append(name); + result.append(':'); + + if (fieldsData != null && type.lazy() == false) { + result.append(fieldsData); + } + + result.append('>'); + return result.toString(); + } + + public void setDocValues(PerDocFieldValues docValues) { + this.docValues = docValues; + } + + @Override + public PerDocFieldValues docValues() { + return null; + } + + @Override + public ValueType docValuesType() { + return null; + } + + /** Returns FieldType for this field. */ + public FieldType getFieldType() { + return type; } } diff --git a/lucene/src/java/org/apache/lucene/document/FieldType.java b/lucene/src/java/org/apache/lucene/document/FieldType.java new file mode 100644 index 00000000000..50def54cf2c --- /dev/null +++ b/lucene/src/java/org/apache/lucene/document/FieldType.java @@ -0,0 +1,186 @@ +package org.apache.lucene.document; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.FieldInfo.IndexOptions; + +public class FieldType { + + private boolean indexed; + private boolean stored; + private boolean tokenized; + private boolean storeTermVectors; + private boolean storeTermVectorOffsets; + private boolean storeTermVectorPositions; + private boolean omitNorms; + private IndexOptions indexOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; + private boolean lazy; + private boolean frozen; + + public FieldType(FieldType ref) { + this.indexed = ref.indexed(); + this.stored = ref.stored(); + this.tokenized = ref.tokenized(); + this.storeTermVectors = ref.storeTermVectors(); + this.storeTermVectorOffsets = ref.storeTermVectorOffsets(); + this.storeTermVectorPositions = ref.storeTermVectorPositions(); + this.omitNorms = ref.omitNorms(); + this.indexOptions = ref.indexOptions(); + this.lazy = ref.lazy(); + } + + public FieldType() { + } + + private void checkIfFrozen() { + if (frozen) { + throw new IllegalStateException(); + } + } + + public void freeze() { + this.frozen = true; + } + + public boolean indexed() { + return this.indexed; + } + + public void setIndexed(boolean value) { + checkIfFrozen(); + this.indexed = value; + } + + public boolean stored() { + return this.stored; + } + + public void setStored(boolean value) { + checkIfFrozen(); + this.stored = value; + } + + public boolean tokenized() { + return this.tokenized; + } + + public void setTokenized(boolean value) { + checkIfFrozen(); + this.tokenized = value; + } + + public boolean storeTermVectors() { + return this.storeTermVectors; + } + + public void setStoreTermVectors(boolean value) { + checkIfFrozen(); + this.storeTermVectors = value; + } + + public boolean storeTermVectorOffsets() { + return this.storeTermVectorOffsets; + } + + public void setStoreTermVectorOffsets(boolean value) { + checkIfFrozen(); + this.storeTermVectorOffsets = value; + } + + public boolean storeTermVectorPositions() { + return this.storeTermVectorPositions; + } + + public void setStoreTermVectorPositions(boolean value) { + checkIfFrozen(); + this.storeTermVectorPositions = value; + } + + public boolean omitNorms() { + return this.omitNorms; + } + + public void setOmitNorms(boolean value) { + checkIfFrozen(); + this.omitNorms = value; + } + + public IndexOptions indexOptions() { + return this.indexOptions; + } + + public void setIndexOptions(IndexOptions value) { + checkIfFrozen(); + this.indexOptions = value; + } + + public boolean lazy() { + return this.lazy; + } + + public void setLazy(boolean value) { + checkIfFrozen(); + this.lazy = value; + } + + /** Prints a Field for human consumption. */ + @Override + public final String toString() { + StringBuilder result = new StringBuilder(); + if (stored()) { + result.append("stored"); + } + if (indexed()) { + if (result.length() > 0) + result.append(","); + result.append("indexed"); + } + if (tokenized()) { + if (result.length() > 0) + result.append(","); + result.append("tokenized"); + } + if (storeTermVectors()) { + if (result.length() > 0) + result.append(","); + result.append("termVector"); + } + if (storeTermVectorOffsets()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorOffsets"); + } + if (storeTermVectorPositions()) { + if (result.length() > 0) + result.append(","); + result.append("termVectorPosition"); + } + if (omitNorms()) { + result.append(",omitNorms"); + } + if (indexOptions != IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { + result.append(",indexOptions="); + result.append(indexOptions); + } + if (lazy()){ + result.append(",lazy"); + } + + return result.toString(); + } +} diff --git a/lucene/src/java/org/apache/lucene/document/Fieldable.java b/lucene/src/java/org/apache/lucene/document/Fieldable.java deleted file mode 100755 index f15ef54866a..00000000000 --- a/lucene/src/java/org/apache/lucene/document/Fieldable.java +++ /dev/null @@ -1,238 +0,0 @@ -package org.apache.lucene.document; - -/** - * Copyright 2004 The Apache Software Foundation - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.index.FieldInfo.IndexOptions; -import org.apache.lucene.index.FieldInvertState; // for javadocs -import org.apache.lucene.index.values.IndexDocValues; -import org.apache.lucene.index.values.PerDocFieldValues; -import org.apache.lucene.index.values.ValueType; -import org.apache.lucene.search.PhraseQuery; // for javadocs -import org.apache.lucene.search.spans.SpanQuery; // for javadocs - -import java.io.Reader; - -/** - * Synonymous with {@link Field}. - * - *

      WARNING: This interface may change within minor versions, despite Lucene's backward compatibility requirements. - * This means new methods may be added from version to version. This change only affects the Fieldable API; other backwards - * compatibility promises remain intact. For example, Lucene can still - * read and write indices created within the same major version. - *

      - * - **/ -public interface Fieldable { - /** Sets the boost factor hits on this field. This value will be - * multiplied into the score of all hits on this this field of this - * document. - * - *

      The boost is multiplied by {@link org.apache.lucene.document.Document#getBoost()} of the document - * containing this field. If a document has multiple fields with the same - * name, all such values are multiplied together. This product is then - * used to compute the norm factor for the field. By - * default, in the {@link - * org.apache.lucene.search.Similarity#computeNorm(FieldInvertState)} method, the boost value is multiplied - * by the length normalization factor - * and then rounded by {@link org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float)} before it is stored in the - * index. One should attempt to ensure that this product does not overflow - * the range of that encoding. - * - * @see org.apache.lucene.document.Document#setBoost(float) - * @see org.apache.lucene.search.Similarity#computeNorm(FieldInvertState) - * @see org.apache.lucene.search.DefaultSimilarity#encodeNormValue(float) - */ - void setBoost(float boost); - - /** Returns the boost factor for hits for this field. - * - *

      The default value is 1.0. - * - *

      Note: this value is not stored directly with the document in the index. - * Documents returned from {@link org.apache.lucene.index.IndexReader#document(int)} and - * {@link org.apache.lucene.search.IndexSearcher#doc(int)} may thus not have the same value present as when - * this field was indexed. - * - * @see #setBoost(float) - */ - float getBoost(); - - /** Returns the name of the field. - * For example "date", "title", "body", ... - */ - String name(); - - /** The value of the field as a String, or null. - *

      - * For indexing, if isStored()==true, the stringValue() will be used as the stored field value - * unless isBinary()==true, in which case getBinaryValue() will be used. - * - * If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. - * If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate indexed tokens if not null, - * else readerValue() will be used to generate indexed tokens if not null, else stringValue() will be used to generate tokens. - */ - public String stringValue(); - - /** The value of the field as a Reader, which can be used at index time to generate indexed tokens. - * @see #stringValue() - */ - public Reader readerValue(); - - /** The TokenStream for this field to be used when indexing, or null. - * @see #stringValue() - */ - public TokenStream tokenStreamValue(); - - /** True if the value of the field is to be stored in the index for return - with search hits. */ - boolean isStored(); - - /** True if the value of the field is to be indexed, so that it may be - searched on. */ - boolean isIndexed(); - - /** True if the value of the field should be tokenized as text prior to - indexing. Un-tokenized fields are indexed as a single word and may not be - Reader-valued. */ - boolean isTokenized(); - - /** True if the term or terms used to index this field are stored as a term - * vector, available from {@link org.apache.lucene.index.IndexReader#getTermFreqVector(int,String)}. - * These methods do not provide access to the original content of the field, - * only to terms used to index it. If the original content must be - * preserved, use the stored attribute instead. - * - * @see org.apache.lucene.index.IndexReader#getTermFreqVector(int, String) - */ - boolean isTermVectorStored(); - - /** - * True if terms are stored as term vector together with their offsets - * (start and end positon in source text). - */ - boolean isStoreOffsetWithTermVector(); - - /** - * True if terms are stored as term vector together with their token positions. - */ - boolean isStorePositionWithTermVector(); - - /** True if the value of the field is stored as binary */ - boolean isBinary(); - - /** True if norms are omitted for this indexed field */ - boolean getOmitNorms(); - - /** Expert: - * - * If set, omit normalization factors associated with this indexed field. - * This effectively disables indexing boosts and length normalization for this field. - */ - void setOmitNorms(boolean omitNorms); - - /** - * Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field is lazily loaded, retrieving - * it's values via {@link #stringValue()} or {@link #getBinaryValue()} is only valid as long as the {@link org.apache.lucene.index.IndexReader} that - * retrieved the {@link Document} is still open. - * - * @return true if this field can be loaded lazily - */ - boolean isLazy(); - - /** - * Returns offset into byte[] segment that is used as value, if Field is not binary - * returned value is undefined - * @return index of the first character in byte[] segment that represents this Field value - */ - abstract int getBinaryOffset(); - - /** - * Returns length of byte[] segment that is used as value, if Field is not binary - * returned value is undefined - * @return length of byte[] segment that represents this Field value - */ - abstract int getBinaryLength(); - - /** - * Return the raw byte[] for the binary field. Note that - * you must also call {@link #getBinaryLength} and {@link - * #getBinaryOffset} to know which range of bytes in this - * returned array belong to the field. - * @return reference to the Field value as byte[]. - */ - abstract byte[] getBinaryValue(); - - /** - * Return the raw byte[] for the binary field. Note that - * you must also call {@link #getBinaryLength} and {@link - * #getBinaryOffset} to know which range of bytes in this - * returned array belong to the field.

      - * About reuse: if you pass in the result byte[] and it is - * used, likely the underlying implementation will hold - * onto this byte[] and return it in future calls to - * {@link #getBinaryValue()}. - * So if you subsequently re-use the same byte[] elsewhere - * it will alter this Fieldable's value. - * @param result User defined buffer that will be used if - * possible. If this is null or not large enough, a new - * buffer is allocated - * @return reference to the Field value as byte[]. - */ - abstract byte[] getBinaryValue(byte[] result); - - /** @see #setIndexOptions */ - IndexOptions getIndexOptions(); - - /** Expert: - * - * If set, omit term freq, and optionally positions and payloads from - * postings for this field. - * - *

      NOTE: While this option reduces storage space - * required in the index, it also means any query - * requiring positional information, such as {@link - * PhraseQuery} or {@link SpanQuery} subclasses will - * fail with an exception. - */ - void setIndexOptions(IndexOptions indexOptions); - - /** - * Returns the {@link PerDocFieldValues} - */ - public PerDocFieldValues getDocValues(); - - /** - * Sets the {@link PerDocFieldValues} for this field. If - * {@link PerDocFieldValues} is set this field will store per-document values - * - * @see IndexDocValues - */ - public void setDocValues(PerDocFieldValues docValues); - - /** - * Returns true iff {@link PerDocFieldValues} are set on this - * field. - */ - public boolean hasDocValues(); - - /** - * Returns the {@link ValueType} of the set {@link PerDocFieldValues} or - * null if not set. - */ - public ValueType docValuesType(); -} diff --git a/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java b/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java index e7984af610b..53288ca321f 100644 --- a/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java +++ b/lucene/src/java/org/apache/lucene/document/IndexDocValuesField.java @@ -20,16 +20,13 @@ import java.io.Reader; import java.util.Comparator; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.index.values.ValueType; import org.apache.lucene.util.BytesRef; /** *

      - * This class provides a {@link AbstractField} that enables storing of typed + * This class provides a {@link Field} that enables storing of typed * per-document values for scoring, sorting or value retrieval. Here's an * example usage, adding an int value: * @@ -54,16 +51,14 @@ import org.apache.lucene.util.BytesRef; *

    * *

    - * If doc values are stored in addition to an indexed ({@link Index}) or stored - * ({@link Store}) value it's recommended to use the {@link IndexDocValuesField}'s - * {@link #set(AbstractField)} API: + * If doc values are stored in addition to an indexed ({@link FieldType#setIndexed(boolean)}) or stored + * ({@link FieldType#setStored(boolean)}) value it's recommended to pass the appropriate {@link FieldType} + * when creating the field: * *

    - *  IndexDocValuesField field = new IndexDocValuesField(name);
    - *  Field indexedField = new Field(name, stringValue, Stored.NO, Indexed.ANALYZED);
    + *  IndexDocValuesField field = new IndexDocValuesField(name, StringField.TYPE_STORED);
      *  Document document = new Document();
    - *  document.add(indexedField);
    - *  field.set(indexedField);
    + *  document.add(field);
      *  for(all documents) {
      *    ...
      *    field.setInt(value)
    @@ -73,7 +68,8 @@ import org.apache.lucene.util.BytesRef;
      * 
    * * */ -public class IndexDocValuesField extends AbstractField implements PerDocFieldValues { +// TODO: maybe rename to DocValuesField? +public class IndexDocValuesField extends Field implements PerDocFieldValues { protected BytesRef bytes; protected double doubleValue; @@ -85,21 +81,27 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal * Creates a new {@link IndexDocValuesField} with the given name. */ public IndexDocValuesField(String name) { - super(name, Store.NO, Index.NO, TermVector.NO); - setDocValues(this); + this(name, new FieldType()); } - /** - * Creates a {@link IndexDocValuesField} prototype - */ - IndexDocValuesField() { - this(""); + public IndexDocValuesField(String name, FieldType type) { + this(name, type, null); + } + + public IndexDocValuesField(String name, FieldType type, String value) { + super(name, type); + fieldsData = value; + } + + @Override + public PerDocFieldValues docValues() { + return this; } /** * Sets the given long value and sets the field's {@link ValueType} to * {@link ValueType#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setType(ValueType)}. + * default type use {@link #setDocValuesType(ValueType)}. */ public void setInt(long value) { setInt(value, false); @@ -124,7 +126,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal /** * Sets the given int value and sets the field's {@link ValueType} to * {@link ValueType#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setType(ValueType)}. + * default type use {@link #setDocValuesType(ValueType)}. */ public void setInt(int value) { setInt(value, false); @@ -149,7 +151,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal /** * Sets the given short value and sets the field's {@link ValueType} to * {@link ValueType#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setType(ValueType)}. + * default type use {@link #setDocValuesType(ValueType)}. */ public void setInt(short value) { setInt(value, false); @@ -174,11 +176,12 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal /** * Sets the given byte value and sets the field's {@link ValueType} to * {@link ValueType#VAR_INTS} unless already set. If you want to change the - * default type use {@link #setType(ValueType)}. + * default type use {@link #setDocValuesType(ValueType)}. */ public void setInt(byte value) { setInt(value, false); } + /** * Sets the given byte value as a 8 bit signed integer. * @@ -198,7 +201,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal /** * Sets the given float value and sets the field's {@link ValueType} * to {@link ValueType#FLOAT_32} unless already set. If you want to - * change the type use {@link #setType(ValueType)}. + * change the type use {@link #setDocValuesType(ValueType)}. */ public void setFloat(float value) { if (type == null) { @@ -210,7 +213,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal /** * Sets the given double value and sets the field's {@link ValueType} * to {@link ValueType#FLOAT_64} unless already set. If you want to - * change the default type use {@link #setType(ValueType)}. + * change the default type use {@link #setDocValuesType(ValueType)}. */ public void setFloat(double value) { if (type == null) { @@ -241,7 +244,7 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal if (value == null) { throw new IllegalArgumentException("value must not be null"); } - setType(type); + setDocValuesType(type); if (bytes == null) { bytes = new BytesRef(value); } else { @@ -289,20 +292,13 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal /** * Sets the {@link ValueType} for this field. */ - public void setType(ValueType type) { + public void setDocValuesType(ValueType type) { if (type == null) { throw new IllegalArgumentException("Type must not be null"); } this.type = type; } - /** - * Returns the field's {@link ValueType} - */ - public ValueType type() { - return type; - } - /** * Returns always null */ @@ -310,13 +306,6 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal return null; } - /** - * Returns always null - */ - public String stringValue() { - return null; - } - /** * Returns always null */ @@ -324,25 +313,14 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal return null; } - /** - * Sets this {@link IndexDocValuesField} to the given {@link AbstractField} and - * returns the given field. Any modifications to this instance will be visible - * to the given field. - */ - public T set(T field) { - field.setDocValues(this); - return field; + @Override + public ValueType docValuesType() { + return type; } - /** - * Sets a new {@link PerDocFieldValues} instance on the given field with the - * given type and returns it. - * - */ - public static T set(T field, ValueType type) { - if (field instanceof IndexDocValuesField) - return field; - final IndexDocValuesField valField = new IndexDocValuesField(); + @Override + public String toString() { + final String value; switch (type) { case BYTES_FIXED_DEREF: case BYTES_FIXED_SORTED: @@ -350,9 +328,43 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal case BYTES_VAR_DEREF: case BYTES_VAR_SORTED: case BYTES_VAR_STRAIGHT: - BytesRef ref = field.isBinary() ? new BytesRef(field.getBinaryValue(), - field.getBinaryOffset(), field.getBinaryLength()) : new BytesRef( - field.stringValue()); + value = "bytes:bytes.utf8ToString();"; + break; + case VAR_INTS: + value = "int:" + longValue; + break; + case FLOAT_32: + value = "float32:" + doubleValue; + break; + case FLOAT_64: + value = "float64:" + doubleValue; + break; + default: + throw new IllegalArgumentException("unknown type: " + type); + } + return "<" + name() + ": IndexDocValuesField " + value + ">"; + } + + /** + * Returns an IndexDocValuesField holding the value from + * the provided string field, as the specified type. The + * incoming field must have a string value. The name, {@link + * FieldType} and string value are carried over from the + * incoming Field. + */ + public static IndexDocValuesField build(Field field, ValueType type) { + if (field instanceof IndexDocValuesField) { + return (IndexDocValuesField) field; + } + final IndexDocValuesField valField = new IndexDocValuesField(field.name(), field.getFieldType(), field.stringValue()); + switch (type) { + case BYTES_FIXED_DEREF: + case BYTES_FIXED_SORTED: + case BYTES_FIXED_STRAIGHT: + case BYTES_VAR_DEREF: + case BYTES_VAR_SORTED: + case BYTES_VAR_STRAIGHT: + BytesRef ref = field.isBinary() ? field.binaryValue() : new BytesRef(field.stringValue()); valField.setBytes(ref, type); break; case VAR_INTS: @@ -367,7 +379,6 @@ public class IndexDocValuesField extends AbstractField implements PerDocFieldVal default: throw new IllegalArgumentException("unknown type: " + type); } - return valField.set(field); + return valField; } - } diff --git a/lucene/src/java/org/apache/lucene/document/NumericField.java b/lucene/src/java/org/apache/lucene/document/NumericField.java index ff92463b4dd..4b97f40c490 100644 --- a/lucene/src/java/org/apache/lucene/document/NumericField.java +++ b/lucene/src/java/org/apache/lucene/document/NumericField.java @@ -22,28 +22,30 @@ import java.io.Reader; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.NumericField.DataType; import org.apache.lucene.util.NumericUtils; import org.apache.lucene.search.NumericRangeQuery; // javadocs import org.apache.lucene.search.NumericRangeFilter; // javadocs import org.apache.lucene.search.FieldCache; // javadocs /** - *

    This class provides a {@link Field} that enables indexing - * of numeric values for efficient range filtering and - * sorting. Here's an example usage, adding an int value: + *

    + * This class provides a {@link Field} that enables indexing of numeric values + * for efficient range filtering and sorting. Here's an example usage, adding an + * int value: + * *

    - *  document.add(new NumericField(name).setIntValue(value));
    + * document.add(new NumericField(name).setIntValue(value));
      * 
    - * - * For optimal performance, re-use the - * NumericField and {@link Document} instance for more than - * one document: - * + * + * For optimal performance, re-use the NumericField and + * {@link Document} instance for more than one document: + * *
      *  NumericField field = new NumericField(name);
      *  Document document = new Document();
      *  document.add(field);
    - *
    + * 
      *  for(all documents) {
      *    ...
      *    field.setIntValue(value)
    @@ -74,7 +76,7 @@ import org.apache.lucene.search.FieldCache; // javadocs
      *
      * 

    By default, a NumericField's value is not stored but * is indexed for range filtering and sorting. You can use - * the {@link #NumericField(String,Field.Store,boolean)} + * the {@link #NumericField(String, FieldType)} * constructor if you need to change these defaults.

    * *

    You may add the same field name as a NumericField to @@ -102,7 +104,7 @@ import org.apache.lucene.search.FieldCache; // javadocs * default value, 4, was selected for a reasonable tradeoff * of disk space consumption versus performance. You can * use the expert constructor {@link - * #NumericField(String,int,Field.Store,boolean)} if you'd + * #NumericField(String,int,FieldType)} if you'd * like to change the value. Note that you must also * specify a congruent value when creating {@link * NumericRangeQuery} or {@link NumericRangeFilter}. @@ -129,94 +131,136 @@ import org.apache.lucene.search.FieldCache; // javadocs * * @since 2.9 */ -public final class NumericField extends AbstractField { - +public final class NumericField extends Field { + /** Data type of the value in {@link NumericField}. * @since 3.2 */ public static enum DataType { INT, LONG, FLOAT, DOUBLE } - private transient NumericTokenStream numericTS; - private DataType type; - private final int precisionStep; + public static final FieldType TYPE_UNSTORED = new FieldType(); + public static final FieldType TYPE_STORED = new FieldType(); + static { + TYPE_UNSTORED.setIndexed(true); + TYPE_UNSTORED.setTokenized(true); + TYPE_UNSTORED.setOmitNorms(true); + TYPE_UNSTORED.setIndexOptions(IndexOptions.DOCS_ONLY); + TYPE_UNSTORED.freeze(); + TYPE_STORED.setIndexed(true); + TYPE_STORED.setStored(true); + TYPE_STORED.setTokenized(true); + TYPE_STORED.setOmitNorms(true); + TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); + TYPE_STORED.freeze(); + } + + //public static enum DataType { INT, LONG, FLOAT, DOUBLE } + + private DataType dataType; + private transient NumericTokenStream numericTS; + private final int precisionStep; + /** - * Creates a field for numeric values using the default precisionStep - * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * This constructor creates an indexed, but not stored field. - * @param name the field name + * Creates a field for numeric values using the default + * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * The instance is not yet initialized with a numeric value, before indexing a + * document containing this field, set a value using the various set + * ???Value() methods. This constructor creates an indexed, but not + * stored field. + * + * @param name + * the field name */ public NumericField(String name) { - this(name, NumericUtils.PRECISION_STEP_DEFAULT, Field.Store.NO, true); + this(name, NumericUtils.PRECISION_STEP_DEFAULT, NumericField.TYPE_UNSTORED); } /** - * Creates a field for numeric values using the default precisionStep - * {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * @param name the field name - * @param store if the field should be stored, {@link Document#getFieldable} - * then returns {@code NumericField} instances on search results. - * @param index if the field should be indexed using {@link NumericTokenStream} + * Creates a field for numeric values using the default + * precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + * The instance is not yet initialized with a numeric value, before indexing a + * document containing this field, set a value using the various set + * ???Value() methods. + * + * @param name + * the field name + * @param type + * if the defualt field should be altered, e.g. stored, + * {@link Document#getField} then returns {@code NumericField} + * instances on search results, or indexed using + * {@link NumericTokenStream} */ - public NumericField(String name, Field.Store store, boolean index) { - this(name, NumericUtils.PRECISION_STEP_DEFAULT, store, index); + public NumericField(String name, FieldType type) { + this(name, NumericUtils.PRECISION_STEP_DEFAULT, type); } /** * Creates a field for numeric values with the specified - * precisionStep. The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * This constructor creates an indexed, but not stored field. - * @param name the field name - * @param precisionStep the used precision step + * precisionStep. The instance is not yet initialized with a + * numeric value, before indexing a document containing this field, set a + * value using the various set???Value() methods. This constructor + * creates an indexed, but not stored field. + * + * @param name + * the field name + * @param precisionStep + * the used precision step */ public NumericField(String name, int precisionStep) { - this(name, precisionStep, Field.Store.NO, true); + this(name, precisionStep, NumericField.TYPE_UNSTORED); } - + /** * Creates a field for numeric values with the specified - * precisionStep. The instance is not yet initialized with - * a numeric value, before indexing a document containing this field, - * set a value using the various set???Value() methods. - * @param name the field name - * @param precisionStep the used precision step - * @param store if the field should be stored, {@link Document#getFieldable} - * then returns {@code NumericField} instances on search results. - * @param index if the field should be indexed using {@link NumericTokenStream} + * precisionStep. The instance is not yet initialized with a + * numeric value, before indexing a document containing this field, set a + * value using the various set???Value() methods. + * + * @param name + * the field name + * @param precisionStep + * the used precision step + * @param type + * if the defualt field should be altered, e.g. stored, + * {@link Document#getField} then returns {@code NumericField} + * instances on search results, or indexed using + * {@link NumericTokenStream} */ - public NumericField(String name, int precisionStep, Field.Store store, boolean index) { - super(name, store, index ? Field.Index.ANALYZED_NO_NORMS : Field.Index.NO, Field.TermVector.NO); + public NumericField(String name, int precisionStep, FieldType type) { + super(name, type); this.precisionStep = precisionStep; - setIndexOptions(IndexOptions.DOCS_ONLY); } - + /** Returns a {@link NumericTokenStream} for indexing the numeric value. */ - public TokenStream tokenStreamValue() { - if (!isIndexed()) - return null; + public TokenStream tokenStreamValue() { + if (!indexed()) return null; if (numericTS == null) { - // lazy init the TokenStream as it is heavy to instantiate (attributes,...), + // lazy init the TokenStream as it is heavy to instantiate + // (attributes,...), // if not needed (stored field loading) numericTS = new NumericTokenStream(precisionStep); // initialize value in TokenStream if (fieldsData != null) { - assert type != null; + assert dataType != null; final Number val = (Number) fieldsData; - switch (type) { + switch (dataType) { case INT: - numericTS.setIntValue(val.intValue()); break; + numericTS.setIntValue(val.intValue()); + break; case LONG: - numericTS.setLongValue(val.longValue()); break; + numericTS.setLongValue(val.longValue()); + break; case FLOAT: - numericTS.setFloatValue(val.floatValue()); break; + numericTS.setFloatValue(val.floatValue()); + break; case DOUBLE: - numericTS.setDoubleValue(val.doubleValue()); break; + numericTS.setDoubleValue(val.doubleValue()); + break; default: assert false : "Should never get here"; } @@ -225,27 +269,28 @@ public final class NumericField extends AbstractField { return numericTS; } - /** Returns always null for numeric fields */ - @Override - public byte[] getBinaryValue(byte[] result){ - return null; - } - /** Returns always null for numeric fields */ public Reader readerValue() { return null; } - - /** Returns the numeric value as a string. This format is also returned if you call {@link Document#get(String)} - * on search results. It is recommended to use {@link Document#getFieldable} instead - * that returns {@code NumericField} instances. You can then use {@link #getNumericValue} - * to return the stored value. */ - public String stringValue() { + + /** + * Returns the numeric value as a string. It is recommended to + * use {@link Document#getField} instead that returns {@code NumericField} + * instances. You can then use {@link #numericValue} to return the stored + * value. + */ + @Override + public String stringValue() { return (fieldsData == null) ? null : fieldsData.toString(); } - /** Returns the current numeric value as a subclass of {@link Number}, null if not yet initialized. */ - public Number getNumericValue() { + /** + * Returns the current numeric value as a subclass of {@link Number}, + * null if not yet initialized. + */ + @Override + public Number numericValue() { return (Number) fieldsData; } @@ -254,63 +299,79 @@ public final class NumericField extends AbstractField { return precisionStep; } - /** Returns the data type of the current value, {@code null} if not yet set. + /** + * Returns the data type of the current value, {@code null} if not yet set. + * * @since 3.2 */ - public DataType getDataType() { - return type; + @Override + public DataType numericDataType() { + return dataType; } - + + @Override + public boolean numeric() { + return true; + } + /** * Initializes the field with the supplied long value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setLongValue(value)) + * document.add(new NumericField(name, precisionStep).setLongValue(value)) */ public NumericField setLongValue(final long value) { if (numericTS != null) numericTS.setLongValue(value); fieldsData = Long.valueOf(value); - type = DataType.LONG; + dataType = DataType.LONG; return this; } /** * Initializes the field with the supplied int value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setIntValue(value)) + * document.add(new NumericField(name, precisionStep).setIntValue(value)) */ public NumericField setIntValue(final int value) { if (numericTS != null) numericTS.setIntValue(value); fieldsData = Integer.valueOf(value); - type = DataType.INT; + dataType = DataType.INT; return this; } /** * Initializes the field with the supplied double value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setDoubleValue(value)) + * document.add(new NumericField(name, precisionStep).setDoubleValue(value)) */ public NumericField setDoubleValue(final double value) { if (numericTS != null) numericTS.setDoubleValue(value); fieldsData = Double.valueOf(value); - type = DataType.DOUBLE; + dataType = DataType.DOUBLE; return this; } /** * Initializes the field with the supplied float value. - * @param value the numeric value + * + * @param value + * the numeric value * @return this instance, because of this you can use it the following way: - * document.add(new NumericField(name, precisionStep).setFloatValue(value)) + * document.add(new NumericField(name, precisionStep).setFloatValue(value)) */ public NumericField setFloatValue(final float value) { if (numericTS != null) numericTS.setFloatValue(value); fieldsData = Float.valueOf(value); - type = DataType.FLOAT; + dataType = DataType.FLOAT; return this; } - + } diff --git a/lucene/src/java/org/apache/lucene/document/StringField.java b/lucene/src/java/org/apache/lucene/document/StringField.java new file mode 100644 index 00000000000..a1b71239072 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/document/StringField.java @@ -0,0 +1,51 @@ +package org.apache.lucene.document; + +import org.apache.lucene.index.FieldInfo.IndexOptions; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +public final class StringField extends Field { + + public static final FieldType TYPE_UNSTORED = new FieldType(); + public static final FieldType TYPE_STORED = new FieldType(); + static { + TYPE_UNSTORED.setIndexed(true); + TYPE_UNSTORED.setOmitNorms(true); + TYPE_UNSTORED.setIndexOptions(IndexOptions.DOCS_ONLY); + TYPE_UNSTORED.freeze(); + + TYPE_STORED.setIndexed(true); + TYPE_STORED.setStored(true); + TYPE_STORED.setOmitNorms(true); + TYPE_STORED.setIndexOptions(IndexOptions.DOCS_ONLY); + TYPE_STORED.freeze(); + } + + public StringField(String name, String value) { + super(name, TYPE_UNSTORED, value); + } + + @Override + public String stringValue() { + return (fieldsData == null) ? null : fieldsData.toString(); + } + + public boolean isNumeric() { + return false; + } +} diff --git a/lucene/src/java/org/apache/lucene/document/TextField.java b/lucene/src/java/org/apache/lucene/document/TextField.java new file mode 100644 index 00000000000..302905243a9 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/document/TextField.java @@ -0,0 +1,54 @@ +package org.apache.lucene.document; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +import org.apache.lucene.analysis.TokenStream; + +public final class TextField extends Field { + + public static final FieldType TYPE_UNSTORED = new FieldType(); + public static final FieldType TYPE_STORED = new FieldType(); + static { + TYPE_UNSTORED.setIndexed(true); + TYPE_UNSTORED.setTokenized(true); + TYPE_UNSTORED.freeze(); + + TYPE_STORED.setIndexed(true); + TYPE_STORED.setStored(true); + TYPE_STORED.setTokenized(true); + TYPE_STORED.freeze(); + } + + public TextField(String name, Reader reader) { + super(name, TextField.TYPE_UNSTORED, reader); + } + + public TextField(String name, String value) { + super(name, TextField.TYPE_UNSTORED, value); + } + + public TextField(String name, TokenStream stream) { + super(name, TextField.TYPE_UNSTORED, stream); + } + + public boolean isNumeric() { + return false; + } +} diff --git a/lucene/src/java/org/apache/lucene/document/package.html b/lucene/src/java/org/apache/lucene/document/package.html index e497184519e..57580f1f0bc 100644 --- a/lucene/src/java/org/apache/lucene/document/package.html +++ b/lucene/src/java/org/apache/lucene/document/package.html @@ -22,16 +22,16 @@

    The logical representation of a {@link org.apache.lucene.document.Document} for indexing and searching.

    The document package provides the user level logical representation of content to be indexed and searched. The -package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.document.Fieldable}s.

    -

    Document and Fieldable

    -

    A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.document.Fieldable}s. A - {@link org.apache.lucene.document.Fieldable} is a logical representation of a user's content that needs to be indexed or stored. - {@link org.apache.lucene.document.Fieldable}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized, - stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.document.Fieldable} +package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.index.IndexableField}s.

    +

    Document and IndexableField

    +

    A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.index.IndexableField}s. A + {@link org.apache.lucene.index.IndexableField} is a logical representation of a user's content that needs to be indexed or stored. + {@link org.apache.lucene.index.IndexableField}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized, + stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.index.IndexableField} for specifics on these properties.

    Note: it is common to refer to {@link org.apache.lucene.document.Document}s having {@link org.apache.lucene.document.Field}s, even though technically they have -{@link org.apache.lucene.document.Fieldable}s.

    +{@link org.apache.lucene.index.IndexableField}s.

    Working with Documents

    First and foremost, a {@link org.apache.lucene.document.Document} is something created by the user application. It is your job to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.) @@ -45,7 +45,7 @@ package also provides utilities for working with {@link org.apache.lucene.docume to simplify indexing of numeric values (and also dates) for fast range range queries with {@link org.apache.lucene.search.NumericRangeQuery} (using a special sortable string representation of numeric values).

    The {@link org.apache.lucene.document.FieldSelector} class provides a mechanism to tell Lucene how to load Documents from -storage. If no FieldSelector is used, all Fieldables on a Document will be loaded. As an example of the FieldSelector usage, consider +storage. If no FieldSelector is used, all IndexableFields on a Document will be loaded. As an example of the FieldSelector usage, consider the common use case of displaying search results on a web page and then having users click through to see the full document. In this scenario, it is often the case that there are many small fields and one or two large fields (containing the contents of the original file). Before the FieldSelector, diff --git a/lucene/src/java/org/apache/lucene/index/CheckIndex.java b/lucene/src/java/org/apache/lucene/index/CheckIndex.java index 011d85b9ad4..875381d4281 100644 --- a/lucene/src/java/org/apache/lucene/index/CheckIndex.java +++ b/lucene/src/java/org/apache/lucene/index/CheckIndex.java @@ -17,6 +17,16 @@ package org.apache.lucene.index; * limitations under the License. */ +import org.apache.lucene.document.FieldType; // for javadocs +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.codecs.CodecProvider; +import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import java.io.File; import java.io.IOException; import java.io.PrintStream; @@ -28,21 +38,11 @@ import java.util.HashMap; import java.util.List; import java.util.Map; -import org.apache.lucene.document.AbstractField; // for javadocs -import org.apache.lucene.document.Document; import org.apache.lucene.index.codecs.BlockTreeTermsReader; -import org.apache.lucene.index.codecs.CodecProvider; -import org.apache.lucene.index.codecs.DefaultSegmentInfosWriter; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.index.values.ValuesEnum; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.StringHelper; @@ -189,7 +189,7 @@ public class CheckIndex { /** True if at least one of the fields in this segment * has position data - * @see AbstractField#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */ + * @see FieldType#setIndexOptions(org.apache.lucene.index.FieldInfo.IndexOptions) */ public boolean hasProx; /** Map that includes certain diff --git a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java index 106b2f0b870..cd930332305 100644 --- a/lucene/src/java/org/apache/lucene/index/DirectoryReader.java +++ b/lucene/src/java/org/apache/lucene/index/DirectoryReader.java @@ -29,8 +29,6 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.Lock; @@ -559,12 +557,11 @@ class DirectoryReader extends IndexReader implements Cloneable { return maxDoc; } - // inherit javadoc @Override - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader + int i = readerIndex(docID); // find segment num + subReaders[i].document(docID - starts[i], visitor); // dispatch to segment reader } @Override diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java index 960ea59eae8..38be3f84d16 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumerPerField.java @@ -18,11 +18,10 @@ package org.apache.lucene.index; */ import java.io.IOException; -import org.apache.lucene.document.Fieldable; abstract class DocFieldConsumerPerField { /** Processes all occurrences of a single field */ - abstract void processFields(Fieldable[] fields, int count) throws IOException; + abstract void processFields(IndexableField[] fields, int count) throws IOException; abstract void abort(); abstract FieldInfo getFieldInfo(); } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java index 5abf003d5a1..03dd6150857 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldConsumersPerField.java @@ -18,7 +18,6 @@ package org.apache.lucene.index; */ import java.io.IOException; -import org.apache.lucene.document.Fieldable; final class DocFieldConsumersPerField extends DocFieldConsumerPerField { @@ -35,7 +34,7 @@ final class DocFieldConsumersPerField extends DocFieldConsumerPerField { } @Override - public void processFields(Fieldable[] fields, int count) throws IOException { + public void processFields(IndexableField[] fields, int count) throws IOException { one.processFields(fields, count); two.processFields(fields, count); } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java index b3da4970a4e..90e2802a6cd 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessor.java @@ -22,15 +22,13 @@ import java.util.Collection; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; -import java.util.List; import java.util.Map; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.DocumentsWriterPerThread.DocState; import org.apache.lucene.index.codecs.Codec; -import org.apache.lucene.index.codecs.PerDocConsumer; import org.apache.lucene.index.codecs.DocValuesConsumer; +import org.apache.lucene.index.codecs.PerDocConsumer; +import org.apache.lucene.index.values.PerDocFieldValues; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.IOUtils; @@ -199,22 +197,16 @@ final class DocFieldProcessor extends DocConsumer { consumer.startDocument(); fieldsWriter.startDocument(); - final Document doc = docState.doc; - fieldCount = 0; final int thisFieldGen = fieldGen++; - final List docFields = doc.getFields(); - final int numDocFields = docFields.size(); - // Absorb any new fields first seen in this document. // Also absorb any changes to fields we had already // seen before (eg suddenly turning on norms or // vectors, etc.): - for(int i=0;i= fieldHash.length/2) + if (totalFieldCount >= fieldHash.length/2) { rehash(); + } } else { - fieldInfos.addOrUpdate(fp.fieldInfo.name, field.isIndexed(), field.isTermVectorStored(), - field.isStorePositionWithTermVector(), field.isStoreOffsetWithTermVector(), - field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType()); + fieldInfos.addOrUpdate(fp.fieldInfo.name, field.indexed(), field.storeTermVectors(), + field.storeTermVectorPositions(), field.storeTermVectorOffsets(), + field.omitNorms(), false, field.indexOptions(), field.docValuesType()); } if (thisFieldGen != fp.lastGen) { @@ -266,12 +259,12 @@ final class DocFieldProcessor extends DocConsumer { fp.addField(field); - if (field.isStored()) { + if (field.stored()) { fieldsWriter.addField(field, fp.fieldInfo); } - if (field.hasDocValues()) { - final DocValuesConsumer docValuesConsumer = docValuesConsumer(docState, fp.fieldInfo); - docValuesConsumer.add(docState.docID, field.getDocValues()); + final PerDocFieldValues docValues = field.docValues(); + if (docValues != null) { + docValuesConsumer(docState, fp.fieldInfo).add(docState.docID, docValues); } } @@ -339,5 +332,4 @@ final class DocFieldProcessor extends DocConsumer { docValues.put(fieldInfo.name, docValuesConsumer); return docValuesConsumer; } - } diff --git a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java index 36b1908f6d3..f0d3e3027ab 100644 --- a/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocFieldProcessorPerField.java @@ -17,7 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.document.Fieldable; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -34,17 +33,17 @@ final class DocFieldProcessorPerField { int lastGen = -1; int fieldCount; - Fieldable[] fields = new Fieldable[1]; + IndexableField[] fields = new IndexableField[1]; public DocFieldProcessorPerField(final DocFieldProcessor docFieldProcessor, final FieldInfo fieldInfo) { this.consumer = docFieldProcessor.consumer.addField(fieldInfo); this.fieldInfo = fieldInfo; } - public void addField(Fieldable field) { + public void addField(IndexableField field) { if (fieldCount == fields.length) { int newSize = ArrayUtil.oversize(fieldCount + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); - Fieldable[] newArray = new Fieldable[newSize]; + IndexableField[] newArray = new IndexableField[newSize]; System.arraycopy(fields, 0, newArray, 0, fieldCount); fields = newArray; } diff --git a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java index 2c0294ee08a..f4a97e25935 100644 --- a/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/DocInverterPerField.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.io.Reader; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; @@ -61,27 +60,32 @@ final class DocInverterPerField extends DocFieldConsumerPerField { } @Override - public void processFields(final Fieldable[] fields, + public void processFields(final IndexableField[] fields, final int count) throws IOException { - fieldState.reset(docState.doc.getBoost()); + fieldState.reset(); final boolean doInvert = consumer.start(fields, count); for(int i=0;i 0) fieldState.position += docState.analyzer == null ? 0 : docState.analyzer.getPositionIncrementGap(fieldInfo.name); - if (!field.isTokenized()) { // un-tokenized field - String stringValue = field.stringValue(); + // TODO (LUCENE-2309): this analysis logic should be + // outside of indexer -- field should simply give us + // a TokenStream, even for multi-valued fields + + if (!field.tokenized()) { // un-tokenized field + final String stringValue = field.stringValue(); + assert stringValue != null; final int valueLength = stringValue.length(); parent.singleToken.reinit(stringValue, 0, valueLength); fieldState.attributeSource = parent.singleToken; @@ -103,17 +107,17 @@ final class DocInverterPerField extends DocFieldConsumerPerField { final TokenStream stream; final TokenStream streamValue = field.tokenStreamValue(); - if (streamValue != null) + if (streamValue != null) { stream = streamValue; - else { + } else { // the field does not have a TokenStream, // so we have to obtain one from the analyzer final Reader reader; // find or make Reader final Reader readerValue = field.readerValue(); - if (readerValue != null) + if (readerValue != null) { reader = readerValue; - else { + } else { String stringValue = field.stringValue(); if (stringValue == null) { throw new IllegalArgumentException("field must have either TokenStream, String or Reader value"); @@ -189,7 +193,7 @@ final class DocInverterPerField extends DocFieldConsumerPerField { } fieldState.offset += docState.analyzer == null ? 0 : docState.analyzer.getOffsetGap(field); - fieldState.boost *= field.getBoost(); + fieldState.boost *= field.boost(); } // LUCENE-2387: don't hang onto the field, so GC can diff --git a/lucene/src/java/org/apache/lucene/index/DocumentStoredFieldVisitor.java b/lucene/src/java/org/apache/lucene/index/DocumentStoredFieldVisitor.java new file mode 100644 index 00000000000..0b689376aaa --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/DocumentStoredFieldVisitor.java @@ -0,0 +1,142 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Set; +import java.util.HashSet; + +import org.apache.lucene.document.BinaryField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.store.IndexInput; + +/** A {@link StoredFieldVisitor} that creates a {@link + * Document} containing all stored fields, or only specific + * requested fields provided to {@link #DocumentStoredFieldVisitor(Set)} + * This is used by {@link IndexReader#document(int)} to load a + * document. + * + * @lucene.experimental */ + +public class DocumentStoredFieldVisitor extends StoredFieldVisitor { + private final Document doc = new Document(); + private final Set fieldsToAdd; + + /** Load only fields named in the provided Set<String>. */ + public DocumentStoredFieldVisitor(Set fieldsToAdd) { + this.fieldsToAdd = fieldsToAdd; + } + + /** Load only fields named in the provided Set<String>. */ + public DocumentStoredFieldVisitor(String... fields) { + fieldsToAdd = new HashSet(fields.length); + for(String field : fields) { + fieldsToAdd.add(field); + } + } + + /** Load all stored fields. */ + public DocumentStoredFieldVisitor() { + this.fieldsToAdd = null; + } + + @Override + public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException { + if (accept(fieldInfo)) { + final byte[] b = new byte[numBytes]; + in.readBytes(b, 0, b.length); + doc.add(new BinaryField(fieldInfo.name, b)); + } else { + in.seek(in.getFilePointer() + numBytes); + } + return false; + } + + @Override + public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException { + if (accept(fieldInfo)) { + final byte[] b = new byte[numUTF8Bytes]; + in.readBytes(b, 0, b.length); + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(fieldInfo.storeTermVector); + ft.setStoreTermVectorPositions(fieldInfo.storePositionWithTermVector); + ft.setStoreTermVectorOffsets(fieldInfo.storeOffsetWithTermVector); + ft.setStoreTermVectors(fieldInfo.storeTermVector); + ft.setOmitNorms(fieldInfo.omitNorms); + ft.setIndexOptions(fieldInfo.indexOptions); + doc.add(new Field(fieldInfo.name, + ft, + new String(b, "UTF-8"))); + } else { + in.seek(in.getFilePointer() + numUTF8Bytes); + } + return false; + } + + @Override + public boolean intField(FieldInfo fieldInfo, int value) { + if (accept(fieldInfo)) { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + doc.add(new NumericField(fieldInfo.name, ft).setIntValue(value)); + } + return false; + } + + @Override + public boolean longField(FieldInfo fieldInfo, long value) { + if (accept(fieldInfo)) { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + doc.add(new NumericField(fieldInfo.name, ft).setLongValue(value)); + } + return false; + } + + @Override + public boolean floatField(FieldInfo fieldInfo, float value) { + if (accept(fieldInfo)) { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + doc.add(new NumericField(fieldInfo.name, ft).setFloatValue(value)); + } + return false; + } + + @Override + public boolean doubleField(FieldInfo fieldInfo, double value) { + if (accept(fieldInfo)) { + FieldType ft = new FieldType(NumericField.TYPE_STORED); + ft.setIndexed(fieldInfo.isIndexed); + doc.add(new NumericField(fieldInfo.name, ft).setDoubleValue(value)); + } + return false; + } + + private boolean accept(FieldInfo fieldInfo) { + return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name); + } + + public Document getDocument() { + return doc; + } +} diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java index d4589f246d2..945503a6310 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriter.java @@ -27,7 +27,6 @@ import java.util.Queue; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.DocumentsWriterPerThread.IndexingChain; import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState; @@ -320,7 +319,7 @@ final class DocumentsWriter { return maybeMerge; } - boolean updateDocuments(final Iterable docs, final Analyzer analyzer, + boolean updateDocuments(final Iterable> docs, final Analyzer analyzer, final Term delTerm) throws CorruptIndexException, IOException { boolean maybeMerge = preUpdate(); @@ -351,7 +350,7 @@ final class DocumentsWriter { return postUpdate(flushingDWPT, maybeMerge); } - boolean updateDocument(final Document doc, final Analyzer analyzer, + boolean updateDocument(final Iterable doc, final Analyzer analyzer, final Term delTerm) throws CorruptIndexException, IOException { boolean maybeMerge = preUpdate(); diff --git a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java index 34460159839..226abbb7002 100644 --- a/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java +++ b/lucene/src/java/org/apache/lucene/index/DocumentsWriterPerThread.java @@ -26,7 +26,6 @@ import java.text.NumberFormat; import java.util.concurrent.atomic.AtomicLong; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; import org.apache.lucene.index.DocumentsWriterDeleteQueue.DeleteSlice; import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; @@ -90,7 +89,7 @@ public class DocumentsWriterPerThread { PrintStream infoStream; SimilarityProvider similarityProvider; int docID; - Document doc; + Iterable doc; String maxTermPrefix; DocState(DocumentsWriterPerThread docWriter) { @@ -213,7 +212,7 @@ public class DocumentsWriterPerThread { return retval; } - public void updateDocument(Document doc, Analyzer analyzer, Term delTerm) throws IOException { + public void updateDocument(Iterable doc, Analyzer analyzer, Term delTerm) throws IOException { assert writer.testPoint("DocumentsWriterPerThread addDocument start"); assert deleteQueue != null; docState.doc = doc; @@ -263,7 +262,7 @@ public class DocumentsWriterPerThread { finishDocument(delTerm); } - public int updateDocuments(Iterable docs, Analyzer analyzer, Term delTerm) throws IOException { + public int updateDocuments(Iterable> docs, Analyzer analyzer, Term delTerm) throws IOException { assert writer.testPoint("DocumentsWriterPerThread addDocuments start"); assert deleteQueue != null; docState.analyzer = analyzer; @@ -280,7 +279,7 @@ public class DocumentsWriterPerThread { } int docCount = 0; try { - for(Document doc : docs) { + for(Iterable doc : docs) { docState.doc = doc; docState.docID = numDocsInRAM; docCount++; diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfo.java b/lucene/src/java/org/apache/lucene/index/FieldInfo.java index 264ecb8414b..d02b85f5a90 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfo.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfo.java @@ -30,9 +30,9 @@ public final class FieldInfo { // true if term vector for this field should be stored - boolean storeTermVector; - boolean storeOffsetWithTermVector; - boolean storePositionWithTermVector; + public boolean storeTermVector; + public boolean storeOffsetWithTermVector; + public boolean storePositionWithTermVector; public boolean omitNorms; // omit norms associated with indexed fields public IndexOptions indexOptions; diff --git a/lucene/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/src/java/org/apache/lucene/index/FieldInfos.java index 681f912e44b..cfb73b820a4 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInfos.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInfos.java @@ -39,8 +39,8 @@ import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.CodecUtil; -/** Access to the Fieldable Info file that describes document fields and whether or - * not they are indexed. Each segment has a separate Fieldable Info file. Objects +/** Access to the Field Info file that describes document fields and whether or + * not they are indexed. Each segment has a separate Field Info file. Objects * of this class are thread-safe for multiple readers, but only one thread can * be adding documents at a time, with no other reader or writer threads * accessing this object. @@ -381,7 +381,7 @@ public final class FieldInfos implements Iterable { /** * Calls 5 parameter add with false for all TermVector parameters. * - * @param name The name of the Fieldable + * @param name The name of the IndexableField * @param isIndexed true if the field is indexed * @see #addOrUpdate(String, boolean, boolean, boolean, boolean) */ diff --git a/lucene/src/java/org/apache/lucene/index/FieldInvertState.java b/lucene/src/java/org/apache/lucene/index/FieldInvertState.java index 0b54500c71d..2172bc38392 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldInvertState.java +++ b/lucene/src/java/org/apache/lucene/index/FieldInvertState.java @@ -50,14 +50,14 @@ public final class FieldInvertState { * Re-initialize the state, using this boost value. * @param docBoost boost value to use. */ - void reset(float docBoost) { + void reset() { position = 0; length = 0; numOverlap = 0; offset = 0; maxTermFrequency = 0; uniqueTermCount = 0; - boost = docBoost; + boost = 1.0f; attributeSource = null; } diff --git a/lucene/src/java/org/apache/lucene/index/FieldsReader.java b/lucene/src/java/org/apache/lucene/index/FieldsReader.java index f0fcbfc121f..d40a5fd6f66 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsReader.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsReader.java @@ -17,16 +17,9 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.AbstractField; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.NumericField; +import java.io.IOException; + import org.apache.lucene.store.AlreadyClosedException; -import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -34,9 +27,6 @@ import org.apache.lucene.util.CloseableThreadLocal; import org.apache.lucene.util.IOUtils; import java.io.Closeable; -import java.io.IOException; -import java.io.Reader; -import java.util.ArrayList; /** * Class responsible for access to stored document fields. @@ -49,7 +39,8 @@ public final class FieldsReader implements Cloneable, Closeable { private final static int FORMAT_SIZE = 4; private final FieldInfos fieldInfos; - + private CloseableThreadLocal fieldsStreamTL = new CloseableThreadLocal(); + // The main fieldStream, used only for cloning. private final IndexInput cloneableFieldsStream; @@ -68,7 +59,6 @@ public final class FieldsReader implements Cloneable, Closeable { // file. This will be 0 if we have our own private file. private int docStoreOffset; - private CloseableThreadLocal fieldsStreamTL = new CloseableThreadLocal(); private boolean isOriginal = false; /** Returns a cloned FieldsReader that shares open @@ -200,50 +190,52 @@ public final class FieldsReader implements Cloneable, Closeable { indexStream.seek(FORMAT_SIZE + (docID + docStoreOffset) * 8L); } - public final Document doc(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public final void visitDocument(int n, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { seekIndex(n); - long position = indexStream.readLong(); - fieldsStream.seek(position); + fieldsStream.seek(indexStream.readLong()); - Document doc = new Document(); - int numFields = fieldsStream.readVInt(); - out: for (int i = 0; i < numFields; i++) { + final int numFields = fieldsStream.readVInt(); + for (int fieldIDX = 0; fieldIDX < numFields; fieldIDX++) { int fieldNumber = fieldsStream.readVInt(); - FieldInfo fi = fieldInfos.fieldInfo(fieldNumber); - FieldSelectorResult acceptField = fieldSelector == null ? FieldSelectorResult.LOAD : fieldSelector.accept(fi.name); + FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); int bits = fieldsStream.readByte() & 0xFF; - assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_TOKENIZED | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits); + assert bits <= (FieldsWriter.FIELD_IS_NUMERIC_MASK | FieldsWriter.FIELD_IS_BINARY): "bits=" + Integer.toHexString(bits); - boolean tokenize = (bits & FieldsWriter.FIELD_IS_TOKENIZED) != 0; - boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; + final boolean binary = (bits & FieldsWriter.FIELD_IS_BINARY) != 0; final int numeric = bits & FieldsWriter.FIELD_IS_NUMERIC_MASK; - switch (acceptField) { - case LOAD: - addField(doc, fi, binary, tokenize, numeric); + final boolean doStop; + if (binary) { + final int numBytes = fieldsStream.readVInt(); + doStop = visitor.binaryField(fieldInfo, fieldsStream, numBytes); + } else if (numeric != 0) { + switch(numeric) { + case FieldsWriter.FIELD_IS_NUMERIC_INT: + doStop = visitor.intField(fieldInfo, fieldsStream.readInt()); break; - case LOAD_AND_BREAK: - addField(doc, fi, binary, tokenize, numeric); - break out; //Get out of this loop - case LAZY_LOAD: - addFieldLazy(doc, fi, binary, tokenize, true, numeric); + case FieldsWriter.FIELD_IS_NUMERIC_LONG: + doStop = visitor.longField(fieldInfo, fieldsStream.readLong()); break; - case LATENT: - addFieldLazy(doc, fi, binary, tokenize, false, numeric); + case FieldsWriter.FIELD_IS_NUMERIC_FLOAT: + doStop = visitor.floatField(fieldInfo, Float.intBitsToFloat(fieldsStream.readInt())); break; - case SIZE: - skipFieldBytes(addFieldSize(doc, fi, binary, numeric)); + case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE: + doStop = visitor.doubleField(fieldInfo, Double.longBitsToDouble(fieldsStream.readLong())); break; - case SIZE_AND_BREAK: - addFieldSize(doc, fi, binary, numeric); - break out; //Get out of this loop default: - skipField(numeric); + throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric)); + } + } else { + // Text: + final int numUTF8Bytes = fieldsStream.readVInt(); + doStop = visitor.stringField(fieldInfo, fieldsStream, numUTF8Bytes); + } + + if (doStop) { + return; } } - - return doc; } /** Returns the length in bytes of each raw document in a @@ -300,225 +292,4 @@ public final class FieldsReader implements Cloneable, Closeable { private void skipFieldBytes(int toRead) throws IOException { fieldsStream.seek(fieldsStream.getFilePointer() + toRead); } - - private NumericField loadNumericField(FieldInfo fi, int numeric) throws IOException { - assert numeric != 0; - switch(numeric) { - case FieldsWriter.FIELD_IS_NUMERIC_INT: - return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setIntValue(fieldsStream.readInt()); - case FieldsWriter.FIELD_IS_NUMERIC_LONG: - return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setLongValue(fieldsStream.readLong()); - case FieldsWriter.FIELD_IS_NUMERIC_FLOAT: - return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setFloatValue(Float.intBitsToFloat(fieldsStream.readInt())); - case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE: - return new NumericField(fi.name, Field.Store.YES, fi.isIndexed).setDoubleValue(Double.longBitsToDouble(fieldsStream.readLong())); - default: - throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric)); - } - } - - private void addFieldLazy(Document doc, FieldInfo fi, boolean binary, boolean tokenize, boolean cacheResult, int numeric) throws IOException { - final AbstractField f; - if (binary) { - int toRead = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - f = new LazyField(fi.name, Field.Store.YES, toRead, pointer, binary, cacheResult); - //Need to move the pointer ahead by toRead positions - fieldsStream.seek(pointer + toRead); - } else if (numeric != 0) { - f = loadNumericField(fi, numeric); - } else { - Field.Store store = Field.Store.YES; - Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize); - Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); - - int length = fieldsStream.readVInt(); - long pointer = fieldsStream.getFilePointer(); - //Skip ahead of where we are by the length of what is stored - fieldsStream.seek(pointer+length); - f = new LazyField(fi.name, store, index, termVector, length, pointer, binary, cacheResult); - } - - f.setOmitNorms(fi.omitNorms); - f.setIndexOptions(fi.indexOptions); - doc.add(f); - } - - private void addField(Document doc, FieldInfo fi, boolean binary, boolean tokenize, int numeric) throws CorruptIndexException, IOException { - final AbstractField f; - - if (binary) { - int toRead = fieldsStream.readVInt(); - final byte[] b = new byte[toRead]; - fieldsStream.readBytes(b, 0, b.length); - f = new Field(fi.name, b); - } else if (numeric != 0) { - f = loadNumericField(fi, numeric); - } else { - Field.Index index = Field.Index.toIndex(fi.isIndexed, tokenize); - Field.TermVector termVector = Field.TermVector.toTermVector(fi.storeTermVector, fi.storeOffsetWithTermVector, fi.storePositionWithTermVector); - f = new Field(fi.name, // name - fieldsStream.readString(), // read value - Field.Store.YES, - index, - termVector); - } - - f.setIndexOptions(fi.indexOptions); - f.setOmitNorms(fi.omitNorms); - doc.add(f); - } - - // Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes) - // Read just the size -- caller must skip the field content to continue reading fields - // Return the size in bytes or chars, depending on field type - private int addFieldSize(Document doc, FieldInfo fi, boolean binary, int numeric) throws IOException { - final int bytesize, size; - switch(numeric) { - case 0: - size = fieldsStream.readVInt(); - bytesize = binary ? size : 2*size; - break; - case FieldsWriter.FIELD_IS_NUMERIC_INT: - case FieldsWriter.FIELD_IS_NUMERIC_FLOAT: - size = bytesize = 4; - break; - case FieldsWriter.FIELD_IS_NUMERIC_LONG: - case FieldsWriter.FIELD_IS_NUMERIC_DOUBLE: - size = bytesize = 8; - break; - default: - throw new FieldReaderException("Invalid numeric type: " + Integer.toHexString(numeric)); - } - byte[] sizebytes = new byte[4]; - sizebytes[0] = (byte) (bytesize>>>24); - sizebytes[1] = (byte) (bytesize>>>16); - sizebytes[2] = (byte) (bytesize>>> 8); - sizebytes[3] = (byte) bytesize ; - doc.add(new Field(fi.name, sizebytes)); - return size; - } - - /** - * A Lazy implementation of Fieldable that defers loading of fields until asked for, instead of when the Document is - * loaded. - */ - private class LazyField extends AbstractField implements Fieldable { - private int toRead; - private long pointer; - private final boolean cacheResult; - - public LazyField(String name, Field.Store store, int toRead, long pointer, boolean isBinary, boolean cacheResult) { - super(name, store, Field.Index.NO, Field.TermVector.NO); - this.toRead = toRead; - this.pointer = pointer; - this.isBinary = isBinary; - this.cacheResult = cacheResult; - if (isBinary) - binaryLength = toRead; - lazy = true; - } - - public LazyField(String name, Field.Store store, Field.Index index, Field.TermVector termVector, int toRead, long pointer, boolean isBinary, boolean cacheResult) { - super(name, store, index, termVector); - this.toRead = toRead; - this.pointer = pointer; - this.isBinary = isBinary; - this.cacheResult = cacheResult; - if (isBinary) - binaryLength = toRead; - lazy = true; - } - - private IndexInput getFieldStream() { - IndexInput localFieldsStream = fieldsStreamTL.get(); - if (localFieldsStream == null) { - localFieldsStream = (IndexInput) cloneableFieldsStream.clone(); - fieldsStreamTL.set(localFieldsStream); - } - return localFieldsStream; - } - - /** The value of the field as a Reader, or null. If null, the String value, - * binary value, or TokenStream value is used. Exactly one of stringValue(), - * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ - public Reader readerValue() { - ensureOpen(); - return null; - } - - /** The value of the field as a TokenStream, or null. If null, the Reader value, - * String value, or binary value is used. Exactly one of stringValue(), - * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ - public TokenStream tokenStreamValue() { - ensureOpen(); - return null; - } - - /** The value of the field as a String, or null. If null, the Reader value, - * binary value, or TokenStream value is used. Exactly one of stringValue(), - * readerValue(), getBinaryValue(), and tokenStreamValue() must be set. */ - public String stringValue() { - ensureOpen(); - if (isBinary) - return null; - else { - if (fieldsData == null) { - String result = null; - IndexInput localFieldsStream = getFieldStream(); - try { - localFieldsStream.seek(pointer); - byte[] bytes = new byte[toRead]; - localFieldsStream.readBytes(bytes, 0, toRead); - result = new String(bytes, "UTF-8"); - } catch (IOException e) { - throw new FieldReaderException(e); - } - if (cacheResult == true){ - fieldsData = result; - } - return result; - } else { - return (String) fieldsData; - } - } - } - - @Override - public byte[] getBinaryValue(byte[] result) { - ensureOpen(); - - if (isBinary) { - if (fieldsData == null) { - // Allocate new buffer if result is null or too small - final byte[] b; - if (result == null || result.length < toRead) - b = new byte[toRead]; - else - b = result; - - IndexInput localFieldsStream = getFieldStream(); - - // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a change for people - // since they are already handling this exception when getting the document - try { - localFieldsStream.seek(pointer); - localFieldsStream.readBytes(b, 0, toRead); - } catch (IOException e) { - throw new FieldReaderException(e); - } - - binaryOffset = 0; - binaryLength = toRead; - if (cacheResult == true){ - fieldsData = b; - } - return b; - } else { - return (byte[]) fieldsData; - } - } else - return null; - } - } } diff --git a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java index e44cfd13834..74800075a35 100644 --- a/lucene/src/java/org/apache/lucene/index/FieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/FieldsWriter.java @@ -17,19 +17,16 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.util.List; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.NumericField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; final class FieldsWriter { - static final int FIELD_IS_TOKENIZED = 1 << 0; + // NOTE: bit 0 is free here! You can steal it! static final int FIELD_IS_BINARY = 1 << 1; // the old bit 1 << 2 was compressed, is now left out @@ -138,15 +135,17 @@ final class FieldsWriter { } } - final void writeField(int fieldNumber, Fieldable field) throws IOException { + final void writeField(int fieldNumber, IndexableField field) throws IOException { fieldsStream.writeVInt(fieldNumber); int bits = 0; - if (field.isTokenized()) - bits |= FIELD_IS_TOKENIZED; - if (field.isBinary()) - bits |= FIELD_IS_BINARY; - if (field instanceof NumericField) { - switch (((NumericField) field).getDataType()) { + final BytesRef bytes; + final String string; + // TODO: maybe a field should serialize itself? + // this way we don't bake into indexer all these + // specific encodings for different fields? and apps + // can customize... + if (field.numeric()) { + switch (field.numericDataType()) { case INT: bits |= FIELD_IS_NUMERIC_INT; break; case LONG: @@ -158,23 +157,31 @@ final class FieldsWriter { default: assert false : "Should never get here"; } + string = null; + bytes = null; + } else { + bytes = field.binaryValue(); + if (bytes != null) { + bits |= FIELD_IS_BINARY; + string = null; + } else { + string = field.stringValue(); + } } + fieldsStream.writeByte((byte) bits); - if (field.isBinary()) { - final byte[] data; - final int len; - final int offset; - data = field.getBinaryValue(); - len = field.getBinaryLength(); - offset = field.getBinaryOffset(); - - fieldsStream.writeVInt(len); - fieldsStream.writeBytes(data, offset, len); - } else if (field instanceof NumericField) { - final NumericField nf = (NumericField) field; - final Number n = nf.getNumericValue(); - switch (nf.getDataType()) { + if (bytes != null) { + fieldsStream.writeVInt(bytes.length); + fieldsStream.writeBytes(bytes.bytes, bytes.offset, bytes.length); + } else if (string != null) { + fieldsStream.writeString(field.stringValue()); + } else { + final Number n = field.numericValue(); + if (n == null) { + throw new IllegalArgumentException("field " + field.name() + " is stored but does not have binaryValue, stringValue nor numericValue"); + } + switch (field.numericDataType()) { case INT: fieldsStream.writeInt(n.intValue()); break; case LONG: @@ -186,8 +193,6 @@ final class FieldsWriter { default: assert false : "Should never get here"; } - } else { - fieldsStream.writeString(field.stringValue()); } } @@ -207,21 +212,21 @@ final class FieldsWriter { assert fieldsStream.getFilePointer() == position; } - final void addDocument(Document doc, FieldInfos fieldInfos) throws IOException { + final void addDocument(Iterable doc, FieldInfos fieldInfos) throws IOException { indexStream.writeLong(fieldsStream.getFilePointer()); int storedCount = 0; - List fields = doc.getFields(); - for (Fieldable field : fields) { - if (field.isStored()) - storedCount++; + for (IndexableField field : doc) { + if (field.stored()) { + storedCount++; + } } fieldsStream.writeVInt(storedCount); - - for (Fieldable field : fields) { - if (field.isStored()) + for (IndexableField field : doc) { + if (field.stored()) { writeField(fieldInfos.fieldNumber(field.name()), field); + } } } } diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index d57591a6b15..5d7f0a50270 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -17,9 +17,8 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.codecs.PerDocValues; +import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -351,9 +350,9 @@ public class FilterIndexReader extends IndexReader { } @Override - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { ensureOpen(); - return in.document(n, fieldSelector); + in.document(docID, visitor); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java index 409becaf3c2..aa4f49f953e 100644 --- a/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/FreqProxTermsWriterPerField.java @@ -22,7 +22,6 @@ import java.util.Comparator; import java.util.Map; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.PostingsConsumer; @@ -82,15 +81,17 @@ final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implem } @Override - boolean start(Fieldable[] fields, int count) { - for(int i=0;inth - * Document in this index. + * Document in this index. This is just + * sugar for using {@link DocumentStoredFieldVisitor}. *

    * NOTE: for performance reasons, this method does not check if the * requested document is deleted, and therefore asking for a deleted document * may yield unspecified results. Usually this is not required, however you * can test if the doc is deleted by checking the {@link * Bits} returned from {@link MultiFields#getLiveDocs}. + * + * NOTE: only the content of a field is returned, + * if that field was stored during indexing. Metadata + * like boost, omitNorm, IndexOptions, tokenized, etc., + * are not preserved. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public Document document(int n) throws CorruptIndexException, IOException { + // TODO: we need a separate StoredField, so that the + // Document returned here contains that class not + // IndexableField + public Document document(int docID) throws CorruptIndexException, IOException { ensureOpen(); - return document(n, null); + final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); + document(docID, visitor); + return visitor.getDocument(); } - /** - * Get the {@link org.apache.lucene.document.Document} at the n - * th position. The {@link FieldSelector} may be used to determine - * what {@link org.apache.lucene.document.Field}s to load and how they should - * be loaded. NOTE: If this Reader (more specifically, the underlying - * FieldsReader) is closed before the lazy - * {@link org.apache.lucene.document.Field} is loaded an exception may be - * thrown. If you want the value of a lazy - * {@link org.apache.lucene.document.Field} to be available after closing you - * must explicitly load it or fetch the Document again with a new loader. - *

    - * NOTE: for performance reasons, this method does not check if the - * requested document is deleted, and therefore asking for a deleted document - * may yield unspecified results. Usually this is not required, however you - * can test if the doc is deleted by checking the {@link - * Bits} returned from {@link MultiFields#getLiveDocs}. - * - * @param n Get the document at the nth position - * @param fieldSelector The {@link FieldSelector} to use to determine what - * Fields should be loaded on the Document. May be null, in which case - * all Fields will be loaded. - * @return The stored fields of the - * {@link org.apache.lucene.document.Document} at the nth position - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * @see org.apache.lucene.document.Fieldable - * @see org.apache.lucene.document.FieldSelector - * @see org.apache.lucene.document.SetBasedFieldSelector - * @see org.apache.lucene.document.LoadFirstFieldSelector - */ - // TODO (1.5): When we convert to JDK 1.5 make this Set - public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException; - /** Returns true if any documents have been deleted */ public abstract boolean hasDeletions(); @@ -1017,8 +999,8 @@ public abstract class IndexReader implements Cloneable,Closeable { public abstract byte[] norms(String field) throws IOException; /** Expert: Resets the normalization factor for the named field of the named - * document. By default, The norm represents the product of the field's {@link - * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its + * document. By default, the norm represents the product of the field's {@link + * org.apache.lucene.document.Field#setBoost(float) boost} and its * length normalization}. Thus, to preserve the length normalization * values when resetting this, one should base the new value upon the old. * diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index c85157b8147..77e3449c66e 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -36,7 +36,6 @@ import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -74,10 +73,10 @@ import org.apache.lucene.util.TwoPhaseCommit; new index if there is not already an index at the provided path and otherwise open the existing index.

    -

    In either case, documents are added with {@link #addDocument(Document) +

    In either case, documents are added with {@link #addDocument(Iterable) addDocument} and removed with {@link #deleteDocuments(Term)} or {@link #deleteDocuments(Query)}. A document can be updated with {@link - #updateDocument(Term, Document) updateDocument} (which just deletes + #updateDocument(Term, Iterable) updateDocument} (which just deletes and then adds the entire document). When finished adding, deleting and updating documents, {@link #close() close} should be called.

    @@ -1281,7 +1280,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc) throws CorruptIndexException, IOException { + public void addDocument(Iterable doc) throws CorruptIndexException, IOException { addDocument(doc, analyzer); } @@ -1289,7 +1288,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * Adds a document to this index, using the provided analyzer instead of the * value of {@link #getAnalyzer()}. * - *

    See {@link #addDocument(Document)} for details on + *

    See {@link #addDocument(Iterable)} for details on * index and IndexWriter state after an Exception, and * flushing/merging temporary free space requirements.

    * @@ -1300,7 +1299,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { + public void addDocument(Iterable doc, Analyzer analyzer) throws CorruptIndexException, IOException { updateDocument(null, doc, analyzer); } @@ -1318,7 +1317,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * compression), in which case you may need to fully * re-index your documents at that time. * - *

    See {@link #addDocument(Document)} for details on + *

    See {@link #addDocument(Iterable)} for details on * index and IndexWriter state after an Exception, and * flushing/merging temporary free space requirements.

    * @@ -1338,7 +1337,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * * @lucene.experimental */ - public void addDocuments(Iterable docs) throws CorruptIndexException, IOException { + public void addDocuments(Iterable> docs) throws CorruptIndexException, IOException { addDocuments(docs, analyzer); } @@ -1353,7 +1352,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * * @lucene.experimental */ - public void addDocuments(Iterable docs, Analyzer analyzer) throws CorruptIndexException, IOException { + public void addDocuments(Iterable> docs, Analyzer analyzer) throws CorruptIndexException, IOException { updateDocuments(null, docs, analyzer); } @@ -1370,7 +1369,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * * @lucene.experimental */ - public void updateDocuments(Term delTerm, Iterable docs) throws CorruptIndexException, IOException { + public void updateDocuments(Term delTerm, Iterable> docs) throws CorruptIndexException, IOException { updateDocuments(delTerm, docs, analyzer); } @@ -1388,7 +1387,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * * @lucene.experimental */ - public void updateDocuments(Term delTerm, Iterable docs, Analyzer analyzer) throws CorruptIndexException, IOException { + public void updateDocuments(Term delTerm, Iterable> docs, Analyzer analyzer) throws CorruptIndexException, IOException { ensureOpen(); try { boolean success = false; @@ -1511,7 +1510,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException { + public void updateDocument(Term term, Iterable doc) throws CorruptIndexException, IOException { ensureOpen(); updateDocument(term, doc, getAnalyzer()); } @@ -1534,7 +1533,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void updateDocument(Term term, Document doc, Analyzer analyzer) + public void updateDocument(Term term, Iterable doc, Analyzer analyzer) throws CorruptIndexException, IOException { ensureOpen(); try { @@ -3034,7 +3033,7 @@ public class IndexWriter implements Closeable, TwoPhaseCommit { DocumentsWriter getDocsWriter() { boolean test = false; assert test = true; - return test?docWriter: null; + return test ? docWriter : null; } /** Expert: Return the number of documents currently diff --git a/lucene/src/java/org/apache/lucene/index/IndexableField.java b/lucene/src/java/org/apache/lucene/index/IndexableField.java new file mode 100644 index 00000000000..90879f23e46 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/IndexableField.java @@ -0,0 +1,104 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.values.PerDocFieldValues; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.util.BytesRef; + +// TODO: how to handle versioning here...? + +// TODO: we need to break out separate StoredField... + +/** Represents a single field for indexing. IndexWriter + * consumes Iterable as a document. + * + * @lucene.experimental */ + +public interface IndexableField { + + // TODO: add attrs to this API? + + /* Field name */ + public String name(); + + // NOTE: if doc/field impl has the notion of "doc level boost" + // it must be multiplied in w/ this field's boost + + /** Field boost (you must pre-multiply in any doc boost). */ + public float boost(); + + /* True if the field's value should be stored */ + public boolean stored(); + + /* Non-null if this field has a binary value */ + public BytesRef binaryValue(); + + /* Non-null if this field has a string value */ + public String stringValue(); + + /* Non-null if this field has a Reader value */ + public Reader readerValue(); + + /* Non-null if this field has a pre-tokenized ({@link TokenStream}) value */ + public TokenStream tokenStreamValue(); + + // Numeric field: + /* True if this field is numeric */ + public boolean numeric(); + + /* Numeric {@link NumericField.DataType}; only used if + * the field is numeric */ + public NumericField.DataType numericDataType(); + + /* Numeric value; only used if the field is numeric */ + public Number numericValue(); + + /* True if this field should be indexed (inverted) */ + public boolean indexed(); + + /* True if this field's value should be analyzed */ + public boolean tokenized(); + + /* True if norms should not be indexed */ + public boolean omitNorms(); + + /* {@link IndexOptions}, describing what should be + * recorded into the inverted index */ + public IndexOptions indexOptions(); + + /* True if term vectors should be indexed */ + public boolean storeTermVectors(); + + /* True if term vector offsets should be indexed */ + public boolean storeTermVectorOffsets(); + + /* True if term vector positions should be indexed */ + public boolean storeTermVectorPositions(); + + /* Non-null if doc values should be indexed */ + public PerDocFieldValues docValues(); + + /* DocValues type; only used if docValues is non-null */ + public ValueType docValuesType(); +} diff --git a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerField.java b/lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerField.java index cb7a333d878..20f10fbccb2 100644 --- a/lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerField.java +++ b/lucene/src/java/org/apache/lucene/index/InvertedDocConsumerPerField.java @@ -19,24 +19,22 @@ package org.apache.lucene.index; import java.io.IOException; -import org.apache.lucene.document.Fieldable; - abstract class InvertedDocConsumerPerField { - // Called once per field, and is given all Fieldable + // Called once per field, and is given all IndexableField // occurrences for this field in the document. Return // true if you wish to see inverted tokens for these // fields: - abstract boolean start(Fieldable[] fields, int count) throws IOException; + abstract boolean start(IndexableField[] fields, int count) throws IOException; // Called before a field instance is being processed - abstract void start(Fieldable field); + abstract void start(IndexableField field); // Called once per inverted token abstract void add() throws IOException; - // Called once per field per document, after all Fieldable - // occurrences are inverted + // Called once per field per document, after all IndexableFields + // are inverted abstract void finish() throws IOException; // Called on hitting an aborting exception diff --git a/lucene/src/java/org/apache/lucene/index/MultiReader.java b/lucene/src/java/org/apache/lucene/index/MultiReader.java index aa0df651c99..72062cf9567 100644 --- a/lucene/src/java/org/apache/lucene/index/MultiReader.java +++ b/lucene/src/java/org/apache/lucene/index/MultiReader.java @@ -22,8 +22,6 @@ import java.util.Collection; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -258,12 +256,11 @@ public class MultiReader extends IndexReader implements Cloneable { return maxDoc; } - // inherit javadoc @Override - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader + int i = readerIndex(docID); // find segment num + subReaders[i].document(docID - starts[i], visitor); // dispatch to segment reader } @Override diff --git a/lucene/src/java/org/apache/lucene/index/ParallelReader.java b/lucene/src/java/org/apache/lucene/index/ParallelReader.java index a281abb8e43..22c006cde62 100644 --- a/lucene/src/java/org/apache/lucene/index/ParallelReader.java +++ b/lucene/src/java/org/apache/lucene/index/ParallelReader.java @@ -17,10 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.values.IndexDocValues; import org.apache.lucene.util.Bits; @@ -351,30 +347,12 @@ public class ParallelReader extends IndexReader { hasDeletions = false; } - // append fields from storedFieldReaders @Override - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { ensureOpen(); - Document result = new Document(); for (final IndexReader reader: storedFieldReaders) { - - boolean include = (fieldSelector==null); - if (!include) { - Collection fields = readerToFields.get(reader); - for (final String field : fields) - if (fieldSelector.accept(field) != FieldSelectorResult.NO_LOAD) { - include = true; - break; - } - } - if (include) { - List fields = reader.document(n, fieldSelector).getFields(); - for (Fieldable field : fields) { - result.add(field); - } - } + reader.document(docID, visitor); } - return result; } // get all vectors diff --git a/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java b/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java index d33e7a3f032..e693d8ff013 100644 --- a/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java +++ b/lucene/src/java/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java @@ -25,9 +25,7 @@ import java.util.Map.Entry; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; @@ -71,12 +69,11 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy { // index is allowed to have exactly one document or 0. if (numDocs == 1) { Document doc = r.document(r.maxDoc() - 1); - Field sid = doc.getField(SNAPSHOTS_ID); - if (sid == null) { + if (doc.getField(SNAPSHOTS_ID) == null) { throw new IllegalStateException("directory is not a valid snapshots store!"); } doc.removeField(SNAPSHOTS_ID); - for (Fieldable f : doc.getFields()) { + for (IndexableField f : doc) { snapshots.put(f.name(), f.stringValue()); } } else if (numDocs != 0) { @@ -189,12 +186,14 @@ public class PersistentSnapshotDeletionPolicy extends SnapshotDeletionPolicy { private void persistSnapshotInfos(String id, String segment) throws IOException { writer.deleteAll(); Document d = new Document(); - d.add(new Field(SNAPSHOTS_ID, "", Store.YES, Index.NO)); + FieldType ft = new FieldType(); + ft.setStored(true); + d.add(new Field(SNAPSHOTS_ID, ft, "")); for (Entry e : super.getSnapshots().entrySet()) { - d.add(new Field(e.getKey(), e.getValue(), Store.YES, Index.NO)); + d.add(new Field(e.getKey(), ft, e.getValue())); } if (id != null) { - d.add(new Field(id, segment, Store.YES, Index.NO)); + d.add(new Field(id, ft, segment)); } writer.addDocument(d); writer.commit(); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index 0abb9e7958d..76c0cac2d5d 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -335,6 +335,10 @@ final class SegmentMerger { // skip deleted docs continue; } + // TODO: this could be more efficient using + // FieldVisitor instead of loading/writing entire + // doc; ie we just have to renumber the field number + // on the fly? // NOTE: it's very important to first assign to doc then pass it to // termVectorsWriter.addAllDocVectors; see LUCENE-1282 Document doc = reader.reader.document(j); diff --git a/lucene/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/src/java/org/apache/lucene/index/SegmentReader.java index 89a0ba688ce..36679fd2a2d 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java @@ -27,13 +27,11 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IndexInput; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; import org.apache.lucene.util.BitVector; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -455,10 +453,9 @@ public class SegmentReader extends IndexReader implements Cloneable { return core.fieldInfos; } - @Override - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { + public void document(int docID, StoredFieldVisitor visitor) throws CorruptIndexException, IOException { ensureOpen(); - return getFieldsReader().doc(n, fieldSelector); + getFieldsReader().visitDocument(docID, visitor); } @Override diff --git a/lucene/src/java/org/apache/lucene/index/StoredFieldVisitor.java b/lucene/src/java/org/apache/lucene/index/StoredFieldVisitor.java new file mode 100644 index 00000000000..9149dfc6660 --- /dev/null +++ b/lucene/src/java/org/apache/lucene/index/StoredFieldVisitor.java @@ -0,0 +1,87 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +import org.apache.lucene.document.Document; +import org.apache.lucene.store.IndexInput; + +/** + * Expert: provides a low-level means of accessing the stored field + * values in an index. See {@link IndexReader#document(int, + * StoredFieldVisitor)}. + * + * See {@link DocumentStoredFieldVisitor}, which is a + * StoredFieldVisitor that builds the + * {@link Document} containing all stored fields. This is + * used by {@link IndexReader#document(int)}. + * + * @lucene.experimental */ + +public class StoredFieldVisitor { + /** Process a binary field. Note that if you want to + * skip the field you must seek the IndexInput + * (e.g., call in.seek(numUTF8Bytes + in.getFilePointer()) + * + *

    Return true to stop loading fields. */ + public boolean binaryField(FieldInfo fieldInfo, IndexInput in, int numBytes) throws IOException { + in.seek(in.getFilePointer() + numBytes); + return false; + } + + /** Process a string field by reading numUTF8Bytes. + * Note that if you want to skip the field you must + * seek the IndexInput as if you had read numBytes by + * (e.g., call in.seek(numUTF8Bytes + in.getFilePointer()) + * + *

    Return true to stop loading fields. */ + public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException { + in.seek(in.getFilePointer() + numUTF8Bytes); + return false; + } + + /** Process a int numeric field. + * + *

    Return true to stop loading fields. */ + public boolean intField(FieldInfo fieldInfo, int value) throws IOException { + return false; + } + + /** Process a long numeric field. + * + *

    Return true to stop loading fields. */ + public boolean longField(FieldInfo fieldInfo, long value) throws IOException { + return false; + } + + /** Process a float numeric field. + * + *

    Return true to stop loading fields. */ + public boolean floatField(FieldInfo fieldInfo, float value) throws IOException { + return false; + } + + /** Process a double numeric field. + * + *

    Return true to stop loading fields. */ + public boolean doubleField(FieldInfo fieldInfo, double value) throws IOException { + return false; + } +} + diff --git a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java index de441d25d77..9a8536adb34 100644 --- a/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java +++ b/lucene/src/java/org/apache/lucene/index/StoredFieldsWriter.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; @@ -41,12 +40,12 @@ final class StoredFieldsWriter { } private int numStoredFields; - private Fieldable[] storedFields; + private IndexableField[] storedFields; private int[] fieldNumbers; public void reset() { numStoredFields = 0; - storedFields = new Fieldable[1]; + storedFields = new IndexableField[1]; fieldNumbers = new int[1]; } @@ -123,10 +122,10 @@ final class StoredFieldsWriter { assert docWriter.writer.testPoint("StoredFieldsWriter.finishDocument end"); } - public void addField(Fieldable field, FieldInfo fieldInfo) throws IOException { + public void addField(IndexableField field, FieldInfo fieldInfo) throws IOException { if (numStoredFields == storedFields.length) { int newSize = ArrayUtil.oversize(numStoredFields + 1, RamUsageEstimator.NUM_BYTES_OBJECT_REF); - Fieldable[] newArray = new Fieldable[newSize]; + IndexableField[] newArray = new IndexableField[newSize]; System.arraycopy(storedFields, 0, newArray, 0, numStoredFields); storedFields = newArray; } diff --git a/lucene/src/java/org/apache/lucene/index/TermFreqVector.java b/lucene/src/java/org/apache/lucene/index/TermFreqVector.java index 29c695a0933..23f01ac11f2 100644 --- a/lucene/src/java/org/apache/lucene/index/TermFreqVector.java +++ b/lucene/src/java/org/apache/lucene/index/TermFreqVector.java @@ -26,7 +26,7 @@ import org.apache.lucene.util.BytesRef; */ public interface TermFreqVector { /** - * The {@link org.apache.lucene.document.Fieldable} name. + * The {@link org.apache.lucene.index.IndexableField} name. * @return The name of the field this vector is associated with. * */ diff --git a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java index 731df7ff926..e96abf0e255 100644 --- a/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java +++ b/lucene/src/java/org/apache/lucene/index/TermVectorsTermsWriterPerField.java @@ -20,7 +20,6 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.util.ByteBlockPool; import org.apache.lucene.util.BytesRef; @@ -55,17 +54,17 @@ final class TermVectorsTermsWriterPerField extends TermsHashConsumerPerField { } @Override - boolean start(Fieldable[] fields, int count) { + boolean start(IndexableField[] fields, int count) { doVectors = false; doVectorPositions = false; doVectorOffsets = false; for(int i=0;i.getIndexReader().document(docID) */ public Document doc(int docID) throws CorruptIndexException, IOException { return reader.document(docID); } - - /* Sugar for .getIndexReader().document(docID, fieldSelector) */ - public Document doc(int docID, FieldSelector fieldSelector) throws CorruptIndexException, IOException { - return reader.document(docID, fieldSelector); + + /* Sugar for .getIndexReader().document(docID, fieldVisitor) */ + public void doc(int docID, StoredFieldVisitor fieldVisitor) throws CorruptIndexException, IOException { + reader.document(docID, fieldVisitor); } - + /** Expert: Set the SimilarityProvider implementation used by this Searcher. * */ diff --git a/lucene/src/java/org/apache/lucene/search/TFIDFSimilarity.java b/lucene/src/java/org/apache/lucene/search/TFIDFSimilarity.java index 3ae85bc4b47..4209dd490cf 100644 --- a/lucene/src/java/org/apache/lucene/search/TFIDFSimilarity.java +++ b/lucene/src/java/org/apache/lucene/search/TFIDFSimilarity.java @@ -21,7 +21,6 @@ package org.apache.lucene.search; import java.io.IOException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.TermContext; import org.apache.lucene.util.SmallFloat; @@ -454,12 +453,8 @@ import org.apache.lucene.util.SmallFloat; * norm(t,d) encapsulates a few (indexing time) boost and length factors: * *

      - *
    • Document boost - set by calling - * {@link org.apache.lucene.document.Document#setBoost(float) doc.setBoost()} - * before adding the document to the index. - *
    • *
    • Field boost - set by calling - * {@link org.apache.lucene.document.Fieldable#setBoost(float) field.setBoost()} + * {@link org.apache.lucene.document.Field#setBoost(float) field.setBoost()} * before adding the field to a document. *
    • *
    • lengthNorm - computed @@ -480,8 +475,6 @@ import org.apache.lucene.util.SmallFloat; * * * norm(t,d)   =   - * {@link org.apache.lucene.document.Document#getBoost() doc.getBoost()} - *  ·  * lengthNorm *  ·  * @@ -489,7 +482,7 @@ import org.apache.lucene.util.SmallFloat; * * * - * {@link org.apache.lucene.document.Fieldable#getBoost() f.getBoost}() + * {@link org.apache.lucene.index.IndexableField#boost() f.boost}() * * * diff --git a/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java b/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java index d0cd139f330..041a1074a97 100644 --- a/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java +++ b/lucene/src/test-framework/org/apache/lucene/analysis/CollationTestBase.java @@ -18,14 +18,11 @@ package org.apache.lucene.analysis; */ -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; -import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.ScoreDoc; @@ -36,8 +33,11 @@ import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; -import org.apache.lucene.document.Field; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IndexableBinaryStringTools; import org.apache.lucene.util.LuceneTestCase; @@ -81,10 +81,8 @@ public abstract class CollationTestBase extends LuceneTestCase { IndexWriter writer = new IndexWriter(ramDir, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", - Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("content", TextField.TYPE_STORED, "\u0633\u0627\u0628")); + doc.add(new Field("body", StringField.TYPE_STORED, "body")); writer.addDocument(doc); writer.close(); IndexSearcher searcher = new IndexSearcher(ramDir, true); @@ -118,8 +116,7 @@ public abstract class CollationTestBase extends LuceneTestCase { // orders the U+0698 character before the U+0633 character, so the single // index Term below should NOT be returned by a TermRangeQuery with a Farsi // Collator (or an Arabic one for the case when Farsi is not supported). - doc.add(new Field("content", "\u0633\u0627\u0628", - Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("content", TextField.TYPE_STORED, "\u0633\u0627\u0628")); writer.addDocument(doc); writer.close(); IndexSearcher searcher = new IndexSearcher(ramDir, true); @@ -141,10 +138,8 @@ public abstract class CollationTestBase extends LuceneTestCase { IndexWriter writer = new IndexWriter(farsiIndex, new IndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - doc.add(new Field("content", "\u0633\u0627\u0628", - Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("body", "body", - Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("content", TextField.TYPE_STORED, "\u0633\u0627\u0628")); + doc.add(new Field("body", StringField.TYPE_STORED, "body")); writer.addDocument(doc); writer.close(); @@ -204,20 +199,21 @@ public abstract class CollationTestBase extends LuceneTestCase { { "J", "y", "HOT", "HOT", "HOT", "HOT" }, }; + FieldType customType = new FieldType(); + customType.setStored(true); + for (int i = 0 ; i < sortData.length ; ++i) { Document doc = new Document(); - doc.add(new Field("tracer", sortData[i][0], - Field.Store.YES, Field.Index.NO)); - doc.add(new Field("contents", sortData[i][1], - Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new Field("tracer", customType, sortData[i][0])); + doc.add(new TextField("contents", sortData[i][1])); if (sortData[i][2] != null) - doc.add(new Field("US", usAnalyzer.reusableTokenStream("US", new StringReader(sortData[i][2])))); + doc.add(new TextField("US", usAnalyzer.reusableTokenStream("US", new StringReader(sortData[i][2])))); if (sortData[i][3] != null) - doc.add(new Field("France", franceAnalyzer.reusableTokenStream("France", new StringReader(sortData[i][3])))); + doc.add(new TextField("France", franceAnalyzer.reusableTokenStream("France", new StringReader(sortData[i][3])))); if (sortData[i][4] != null) - doc.add(new Field("Sweden", swedenAnalyzer.reusableTokenStream("Sweden", new StringReader(sortData[i][4])))); + doc.add(new TextField("Sweden", swedenAnalyzer.reusableTokenStream("Sweden", new StringReader(sortData[i][4])))); if (sortData[i][5] != null) - doc.add(new Field("Denmark", denmarkAnalyzer.reusableTokenStream("Denmark", new StringReader(sortData[i][5])))); + doc.add(new TextField("Denmark", denmarkAnalyzer.reusableTokenStream("Denmark", new StringReader(sortData[i][5])))); writer.addDocument(doc); } writer.optimize(); @@ -250,9 +246,9 @@ public abstract class CollationTestBase extends LuceneTestCase { int n = result.length; for (int i = 0 ; i < n ; ++i) { Document doc = searcher.doc(result[i].doc); - String[] v = doc.getValues("tracer"); + IndexableField[] v = doc.getFields("tracer"); for (int j = 0 ; j < v.length ; ++j) { - buff.append(v[j]); + buff.append(v[j].stringValue()); } } assertEquals(expectedResult, buff.toString()); diff --git a/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java b/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java index 6f95a916a64..1ca5054a7c5 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java +++ b/lucene/src/test-framework/org/apache/lucene/index/DocHelper.java @@ -26,77 +26,114 @@ import java.util.Random; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.document.BinaryField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.LuceneTestCase; import static org.apache.lucene.util.LuceneTestCase.TEST_VERSION_CURRENT; class DocHelper { + + public static final FieldType customType; public static final String FIELD_1_TEXT = "field one text"; public static final String TEXT_FIELD_1_KEY = "textField1"; - public static Field textField1 = new Field(TEXT_FIELD_1_KEY, FIELD_1_TEXT, - Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); - + public static Field textField1; + static { + customType = new FieldType(TextField.TYPE_STORED); + textField1 = new Field(TEXT_FIELD_1_KEY, customType, FIELD_1_TEXT); + } + + public static final FieldType customType2; public static final String FIELD_2_TEXT = "field field field two text"; //Fields will be lexicographically sorted. So, the order is: field, text, two public static final int [] FIELD_2_FREQS = {3, 1, 1}; public static final String TEXT_FIELD_2_KEY = "textField2"; - public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + public static Field textField2; + static { + customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setStoreTermVectors(true); + customType2.setStoreTermVectorPositions(true); + customType2.setStoreTermVectorOffsets(true); + textField2 = new Field(TEXT_FIELD_2_KEY, customType2, FIELD_2_TEXT); + } + public static final FieldType customType3; public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms"; public static final String TEXT_FIELD_3_KEY = "textField3"; - public static Field textField3 = new Field(TEXT_FIELD_3_KEY, FIELD_3_TEXT, Field.Store.YES, Field.Index.ANALYZED); - static { textField3.setOmitNorms(true); } + public static Field textField3; + + static { + customType3 = new FieldType(TextField.TYPE_STORED); + customType3.setOmitNorms(true); + textField3 = new Field(TEXT_FIELD_3_KEY, customType3, FIELD_3_TEXT); + } public static final String KEYWORD_TEXT = "Keyword"; public static final String KEYWORD_FIELD_KEY = "keyField"; - public static Field keyField = new Field(KEYWORD_FIELD_KEY, KEYWORD_TEXT, - Field.Store.YES, Field.Index.NOT_ANALYZED); - - public static final String NO_NORMS_TEXT = "omitNormsText"; - public static final String NO_NORMS_KEY = "omitNorms"; - public static Field noNormsField = new Field(NO_NORMS_KEY, NO_NORMS_TEXT, - Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); - - public static final String NO_TF_TEXT = "analyzed with no tf and positions"; - public static final String NO_TF_KEY = "omitTermFreqAndPositions"; - public static Field noTFField = new Field(NO_TF_KEY, NO_TF_TEXT, - Field.Store.YES, Field.Index.ANALYZED); + public static Field keyField; static { - noTFField.setIndexOptions(IndexOptions.DOCS_ONLY); + keyField = new Field(KEYWORD_FIELD_KEY, StringField.TYPE_STORED, KEYWORD_TEXT); } + public static final FieldType customType5; + public static final String NO_NORMS_TEXT = "omitNormsText"; + public static final String NO_NORMS_KEY = "omitNorms"; + public static Field noNormsField; + static { + customType5 = new FieldType(TextField.TYPE_STORED); + customType5.setOmitNorms(true); + customType5.setTokenized(false); + noNormsField = new Field(NO_NORMS_KEY, customType5, NO_NORMS_TEXT); + } + + public static final FieldType customType6; + public static final String NO_TF_TEXT = "analyzed with no tf and positions"; + public static final String NO_TF_KEY = "omitTermFreqAndPositions"; + public static Field noTFField; + static { + customType6 = new FieldType(TextField.TYPE_STORED); + customType6.setIndexOptions(IndexOptions.DOCS_ONLY); + noTFField = new Field(NO_TF_KEY, customType6, NO_TF_TEXT); + } + + public static final FieldType customType7; public static final String UNINDEXED_FIELD_TEXT = "unindexed field text"; public static final String UNINDEXED_FIELD_KEY = "unIndField"; - public static Field unIndField = new Field(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT, - Field.Store.YES, Field.Index.NO); + public static Field unIndField; + static { + customType7 = new FieldType(); + customType7.setStored(true); + unIndField = new Field(UNINDEXED_FIELD_KEY, customType7, UNINDEXED_FIELD_TEXT); + } public static final String UNSTORED_1_FIELD_TEXT = "unstored field text"; public static final String UNSTORED_FIELD_1_KEY = "unStoredField1"; - public static Field unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT, - Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.NO); + public static Field unStoredField1 = new Field(UNSTORED_FIELD_1_KEY, TextField.TYPE_UNSTORED, UNSTORED_1_FIELD_TEXT); + public static final FieldType customType8; public static final String UNSTORED_2_FIELD_TEXT = "unstored field text"; public static final String UNSTORED_FIELD_2_KEY = "unStoredField2"; - public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT, - Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); + public static Field unStoredField2; + static { + customType8 = new FieldType(TextField.TYPE_UNSTORED); + customType8.setStoreTermVectors(true); + unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, customType8, UNSTORED_2_FIELD_TEXT); + } public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary"; public static byte [] LAZY_FIELD_BINARY_BYTES; public static Field lazyFieldBinary; - + public static final String LAZY_FIELD_KEY = "lazyField"; public static final String LAZY_FIELD_TEXT = "These are some field bytes"; - public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); + public static Field lazyField = new Field(LAZY_FIELD_KEY, customType, LAZY_FIELD_TEXT); public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField"; public static String LARGE_LAZY_FIELD_TEXT; @@ -105,15 +142,13 @@ class DocHelper { //From Issue 509 public static final String FIELD_UTF1_TEXT = "field one \u4e00text"; public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8"; - public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT, - Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); + public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, customType, FIELD_UTF1_TEXT); public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text"; //Fields will be lexicographically sorted. So, the order is: field, text, two public static final int [] FIELD_UTF2_FREQS = {3, 1, 1}; public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8"; - public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, customType2, FIELD_UTF2_TEXT); @@ -139,16 +174,16 @@ class DocHelper { largeLazyField//placeholder for large field, since this is null. It must always be last }; - public static Map all =new HashMap(); - public static Map indexed =new HashMap(); - public static Map stored =new HashMap(); - public static Map unstored=new HashMap(); - public static Map unindexed=new HashMap(); - public static Map termvector=new HashMap(); - public static Map notermvector=new HashMap(); - public static Map lazy= new HashMap(); - public static Map noNorms=new HashMap(); - public static Map noTf=new HashMap(); + public static Map all =new HashMap(); + public static Map indexed =new HashMap(); + public static Map stored =new HashMap(); + public static Map unstored=new HashMap(); + public static Map unindexed=new HashMap(); + public static Map termvector=new HashMap(); + public static Map notermvector=new HashMap(); + public static Map lazy= new HashMap(); + public static Map noNorms=new HashMap(); + public static Map noTf=new HashMap(); static { //Initialize the large Lazy Field @@ -162,28 +197,29 @@ class DocHelper { LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8"); } catch (UnsupportedEncodingException e) { } - lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES); + lazyFieldBinary = new BinaryField(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES); fields[fields.length - 2] = lazyFieldBinary; LARGE_LAZY_FIELD_TEXT = buffer.toString(); - largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.ANALYZED); + largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, customType, LARGE_LAZY_FIELD_TEXT); fields[fields.length - 1] = largeLazyField; for (int i=0; i map, Fieldable field) { + private static void add(Map map, IndexableField field) { map.put(field.name(), field); } @@ -257,17 +293,26 @@ class DocHelper { public static Document createDocument(int n, String indexName, int numFields) { StringBuilder sb = new StringBuilder(); - Document doc = new Document(); - doc.add(new Field("id", Integer.toString(n), Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(new Field("indexname", indexName, Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + + FieldType customType1 = new FieldType(StringField.TYPE_STORED); + customType1.setStoreTermVectors(true); + customType1.setStoreTermVectorPositions(true); + customType1.setStoreTermVectorOffsets(true); + + final Document doc = new Document(); + doc.add(new Field("id", customType1, Integer.toString(n))); + doc.add(new Field("indexname", customType1, indexName)); sb.append("a"); sb.append(n); - doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("field1", customType, sb.toString())); sb.append(" b"); sb.append(n); for (int i = 1; i < numFields; i++) { - doc.add(new Field("field" + (i + 1), sb.toString(), Store.YES, - Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("field" + (i + 1), customType, sb.toString())); } return doc; } diff --git a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java index 77896070ad2..cbf27280b09 100644 --- a/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java +++ b/lucene/src/test-framework/org/apache/lucene/index/RandomIndexWriter.java @@ -120,23 +120,22 @@ public class RandomIndexWriter implements Closeable { /** * Adds a Document. - * @see IndexWriter#addDocument(Document) + * @see IndexWriter#addDocument(Iterable) */ - public void addDocument(final Document doc) throws IOException { - if (doDocValues) { - randomPerDocFieldValues(r, doc); + public void addDocument(final Iterable doc) throws IOException { + if (doDocValues && doc instanceof Document) { + randomPerDocFieldValues(r, (Document) doc); } - if (r.nextInt(5) == 3) { // TODO: maybe, we should simply buffer up added docs // (but we need to clone them), and only when // getReader, commit, etc. are called, we do an // addDocuments? Would be better testing. - w.addDocuments(new Iterable() { + w.addDocuments(new Iterable>() { @Override - public Iterator iterator() { - return new Iterator() { + public Iterator> iterator() { + return new Iterator>() { boolean done; @Override @@ -150,7 +149,7 @@ public class RandomIndexWriter implements Closeable { } @Override - public Document next() { + public Iterable next() { if (done) { throw new IllegalStateException(); } @@ -172,7 +171,7 @@ public class RandomIndexWriter implements Closeable { ValueType[] values = ValueType.values(); ValueType type = values[random.nextInt(values.length)]; String name = "random_" + type.name() + "" + docValuesFieldPrefix; - if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getFieldable(name) != null) + if ("PreFlex".equals(codecProvider.getFieldCodec(name)) || doc.getField(name) != null) return; IndexDocValuesField docValuesField = new IndexDocValuesField(name); switch (type) { @@ -238,31 +237,30 @@ public class RandomIndexWriter implements Closeable { } } - public void addDocuments(Iterable docs) throws IOException { + public void addDocuments(Iterable> docs) throws IOException { w.addDocuments(docs); maybeCommit(); } - public void updateDocuments(Term delTerm, Iterable docs) throws IOException { + public void updateDocuments(Term delTerm, Iterable> docs) throws IOException { w.updateDocuments(delTerm, docs); maybeCommit(); } /** * Updates a document. - * @see IndexWriter#updateDocument(Term, Document) + * @see IndexWriter#updateDocument(Term, Iterable) */ - public void updateDocument(final Term t, final Document doc) throws IOException { + public void updateDocument(Term t, final Iterable doc) throws IOException { if (doDocValues) { - randomPerDocFieldValues(r, doc); + randomPerDocFieldValues(r, (Document) doc); } - if (r.nextInt(5) == 3) { - w.updateDocuments(t, new Iterable() { + w.updateDocuments(t, new Iterable>() { @Override - public Iterator iterator() { - return new Iterator() { + public Iterator> iterator() { + return new Iterator>() { boolean done; @Override @@ -276,7 +274,7 @@ public class RandomIndexWriter implements Closeable { } @Override - public Document next() { + public Iterable next() { if (done) { throw new IllegalStateException(); } diff --git a/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java b/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java index a4cd41f05c9..00d3a93e456 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LineFileDocs.java @@ -30,6 +30,9 @@ import java.util.Random; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; /** Minimal port of contrib/benchmark's LneDocSource + * DocMaker, so tests can enum docs from a line file created @@ -117,19 +120,24 @@ public class LineFileDocs implements Closeable { public DocState() { doc = new Document(); - title = new Field("title", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + title = new StringField("title", ""); doc.add(title); - titleTokenized = new Field("titleTokenized", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(true); + ft.setStoreTermVectorOffsets(true); + ft.setStoreTermVectorPositions(true); + + titleTokenized = new Field("titleTokenized", ft, ""); doc.add(titleTokenized); - body = new Field("body", "", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + body = new Field("body", ft, ""); doc.add(body); - id = new Field("docid", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + id = new Field("docid", StringField.TYPE_STORED, ""); doc.add(id); - date = new Field("date", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + date = new Field("date", StringField.TYPE_STORED, ""); doc.add(date); } } diff --git a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java index db9c6b03d69..2073fb4053a 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java +++ b/lucene/src/test-framework/org/apache/lucene/util/LuceneTestCase.java @@ -35,9 +35,7 @@ import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.*; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; @@ -1131,85 +1129,43 @@ public abstract class LuceneTestCase extends Assert { return dir; } - /** Returns a new field instance. - * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ - public static Field newField(String name, String value, Index index) { - return newField(random, name, value, index); + public static Field newField(String name, String value, FieldType type) { + return newField(random, name, value, type); } - /** Returns a new field instance. - * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ - public static Field newField(String name, String value, Store store, Index index) { - return newField(random, name, value, store, index); - } - - /** - * Returns a new Field instance. Use this when the test does not - * care about some specific field settings (most tests) - *
        - *
      • If the store value is set to Store.NO, sometimes the field will be randomly stored. - *
      • More term vector data than you ask for might be indexed, for example if you choose YES - * it might index term vectors with offsets too. - *
      - */ - public static Field newField(String name, String value, Store store, Index index, TermVector tv) { - return newField(random, name, value, store, index, tv); - } - - /** Returns a new field instance, using the specified random. - * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ - public static Field newField(Random random, String name, String value, Index index) { - return newField(random, name, value, Store.NO, index); - } - - /** Returns a new field instance, using the specified random. - * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ - public static Field newField(Random random, String name, String value, Store store, Index index) { - return newField(random, name, value, store, index, TermVector.NO); - } - - /** Returns a new field instance, using the specified random. - * See {@link #newField(String, String, Field.Store, Field.Index, Field.TermVector)} for more information */ - public static Field newField(Random random, String name, String value, Store store, Index index, TermVector tv) { - - if (usually(random)) { + public static Field newField(Random random, String name, String value, FieldType type) { + if (usually(random) || !type.indexed()) { // most of the time, don't modify the params - return new Field(name, value, store, index, tv); + return new Field(name, type, value); } - if (random.nextBoolean()) { - // tickle any code still relying on field names being interned: - name = new String(name); + FieldType newType = new FieldType(type); + if (!newType.stored() && random.nextBoolean()) { + newType.setStored(true); // randomly store it } - if (!index.isIndexed()) - return new Field(name, value, store, index, tv); + if (!newType.storeTermVectors() && random.nextBoolean()) { + newType.setStoreTermVectors(true); + if (!newType.storeTermVectorOffsets()) { + newType.setStoreTermVectorOffsets(random.nextBoolean()); + } + if (!newType.storeTermVectorPositions()) { + newType.setStoreTermVectorPositions(random.nextBoolean()); + } + } - if (!store.isStored() && random.nextBoolean()) - store = Store.YES; // randomly store it - - tv = randomTVSetting(random, tv); - - return new Field(name, value, store, index, tv); + // TODO: we need to do this, but smarter, ie, most of + // the time we set the same value for a given field but + // sometimes (rarely) we change it up: + /* + if (newType.omitNorms()) { + newType.setOmitNorms(random.nextBoolean()); + } + */ + + return new Field(name, newType, value); } - - static final TermVector tvSettings[] = { - TermVector.NO, TermVector.YES, TermVector.WITH_OFFSETS, - TermVector.WITH_POSITIONS, TermVector.WITH_POSITIONS_OFFSETS - }; - - private static TermVector randomTVSetting(Random random, TermVector minimum) { - switch(minimum) { - case NO: return tvSettings[_TestUtil.nextInt(random, 0, tvSettings.length-1)]; - case YES: return tvSettings[_TestUtil.nextInt(random, 1, tvSettings.length-1)]; - case WITH_OFFSETS: return random.nextBoolean() ? TermVector.WITH_OFFSETS - : TermVector.WITH_POSITIONS_OFFSETS; - case WITH_POSITIONS: return random.nextBoolean() ? TermVector.WITH_POSITIONS - : TermVector.WITH_POSITIONS_OFFSETS; - default: return TermVector.WITH_POSITIONS_OFFSETS; - } - } - + /** return a random Locale from the available locales on the system */ public static Locale randomLocale(Random random) { Locale locales[] = Locale.getAvailableLocales(); diff --git a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java index 4d33c86e0ab..9eb6c89ef7a 100644 --- a/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java +++ b/lucene/src/test-framework/org/apache/lucene/util/_TestUtil.java @@ -28,7 +28,6 @@ import java.io.PrintStream; import java.lang.reflect.Method; import java.util.Enumeration; import java.util.HashMap; -import java.util.List; import java.util.Map; import java.util.Random; import java.util.zip.ZipEntry; @@ -36,11 +35,11 @@ import java.util.zip.ZipFile; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.CheckIndex; import org.apache.lucene.index.ConcurrentMergeScheduler; import org.apache.lucene.index.FieldInfos; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LogMergePolicy; import org.apache.lucene.index.MergePolicy; import org.apache.lucene.index.MergeScheduler; @@ -424,10 +423,9 @@ public class _TestUtil { /** Adds field info for a Document. */ public static void add(Document doc, FieldInfos fieldInfos) { - List fields = doc.getFields(); - for (Fieldable field : fields) { - fieldInfos.addOrUpdate(field.name(), field.isIndexed(), field.isTermVectorStored(), field.isStorePositionWithTermVector(), - field.isStoreOffsetWithTermVector(), field.getOmitNorms(), false, field.getIndexOptions(), field.docValuesType()); + for (IndexableField field : doc) { + fieldInfos.addOrUpdate(field.name(), field.indexed(), field.storeTermVectors(), field.storeTermVectorPositions(), + field.storeTermVectorOffsets(), field.omitNorms(), false, field.indexOptions(), field.docValuesType()); } } @@ -504,15 +502,13 @@ public class _TestUtil { // TODO: is there a pre-existing way to do this!!! public static Document cloneDocument(Document doc1) { final Document doc2 = new Document(); - for(Fieldable f : doc1.getFields()) { + for(IndexableField f : doc1) { Field field1 = (Field) f; Field field2 = new Field(field1.name(), - field1.stringValue(), - field1.isStored() ? Field.Store.YES : Field.Store.NO, - field1.isIndexed() ? (field1.isTokenized() ? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED) : Field.Index.NO); - field2.setOmitNorms(field1.getOmitNorms()); - field2.setIndexOptions(field1.getIndexOptions()); + field1.getFieldType(), + field1.stringValue() + ); doc2.add(field2); } diff --git a/lucene/src/test/org/apache/lucene/TestDemo.java b/lucene/src/test/org/apache/lucene/TestDemo.java index eb537b5e995..fcdda624b4e 100644 --- a/lucene/src/test/org/apache/lucene/TestDemo.java +++ b/lucene/src/test/org/apache/lucene/TestDemo.java @@ -22,7 +22,7 @@ import java.io.IOException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.Term; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.*; @@ -49,8 +49,7 @@ public class TestDemo extends LuceneTestCase { Document doc = new Document(); String longTerm = "longtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongtermlongterm"; String text = "This is the text to be indexed. " + longTerm; - doc.add(newField("fieldname", text, Field.Store.YES, - Field.Index.ANALYZED)); + doc.add(newField("fieldname", text, TextField.TYPE_STORED)); iwriter.addDocument(doc); iwriter.close(); diff --git a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java index de6b730f8a6..7029c425346 100644 --- a/lucene/src/test/org/apache/lucene/TestExternalCodecs.java +++ b/lucene/src/test/org/apache/lucene/TestExternalCodecs.java @@ -529,13 +529,13 @@ public class TestExternalCodecs extends LuceneTestCase { w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); // uses default codec: - doc.add(newField("field1", "this field uses the standard codec as the test", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("field1", "this field uses the standard codec as the test", TextField.TYPE_UNSTORED)); // uses pulsing codec: - Field field2 = newField("field2", "this field uses the pulsing codec as the test", Field.Store.NO, Field.Index.ANALYZED); + Field field2 = newField("field2", "this field uses the pulsing codec as the test", TextField.TYPE_UNSTORED); provider.setFieldCodec(field2.name(), "Pulsing"); doc.add(field2); - Field idField = newField("id", "", Field.Store.NO, Field.Index.NOT_ANALYZED); + Field idField = newField("id", "", StringField.TYPE_UNSTORED); provider.setFieldCodec(idField.name(), "Pulsing"); doc.add(idField); diff --git a/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java b/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java index e477ae97b67..08b1e6dc5ea 100644 --- a/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java +++ b/lucene/src/test/org/apache/lucene/TestMergeSchedulerExternal.java @@ -33,6 +33,7 @@ import org.apache.lucene.index.MergePolicy.OneMerge; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; /** * Holds tests cases to verify external APIs are accessible @@ -90,7 +91,7 @@ public class TestMergeSchedulerExternal extends LuceneTestCase { dir.failOn(new FailOnlyOnMerge()); Document doc = new Document(); - Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + Field idField = newField("id", "", StringField.TYPE_STORED); doc.add(idField); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( diff --git a/lucene/src/test/org/apache/lucene/TestSearch.java b/lucene/src/test/org/apache/lucene/TestSearch.java index 957ec17d78b..1e664a48b1b 100644 --- a/lucene/src/test/org/apache/lucene/TestSearch.java +++ b/lucene/src/test/org/apache/lucene/TestSearch.java @@ -93,8 +93,8 @@ public class TestSearch extends LuceneTestCase { }; for (int j = 0; j < docs.length; j++) { Document d = new Document(); - d.add(newField("contents", docs[j], Field.Store.YES, Field.Index.ANALYZED)); - d.add(newField("id", ""+j, Field.Index.NOT_ANALYZED_NO_NORMS)); + d.add(newField("contents", docs[j], TextField.TYPE_STORED)); + d.add(newField("id", ""+j, StringField.TYPE_UNSTORED)); writer.addDocument(d); } writer.close(); diff --git a/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java b/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java index 00e9c96ae88..f81b25dc790 100644 --- a/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java +++ b/lucene/src/test/org/apache/lucene/TestSearchForDuplicates.java @@ -92,8 +92,8 @@ public class TestSearchForDuplicates extends LuceneTestCase { for (int j = 0; j < MAX_DOCS; j++) { Document d = new Document(); - d.add(newField(PRIORITY_FIELD, HIGH_PRIORITY, Field.Store.YES, Field.Index.ANALYZED)); - d.add(newField(ID_FIELD, Integer.toString(j), Field.Store.YES, Field.Index.ANALYZED)); + d.add(newField(PRIORITY_FIELD, HIGH_PRIORITY, TextField.TYPE_STORED)); + d.add(newField(ID_FIELD, Integer.toString(j), TextField.TYPE_STORED)); writer.addDocument(d); } writer.close(); diff --git a/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java b/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java index 3f68400f7d4..d218d29ee14 100644 --- a/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java +++ b/lucene/src/test/org/apache/lucene/analysis/TestCachingTokenFilter.java @@ -23,8 +23,7 @@ import java.io.IOException; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.DocsAndPositionsEnum; @@ -60,7 +59,7 @@ public class TestCachingTokenFilter extends BaseTokenStreamTestCase { stream = new CachingTokenFilter(stream); - doc.add(new Field("preanalyzed", stream, TermVector.NO)); + doc.add(new TextField("preanalyzed", stream)); // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); diff --git a/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java b/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java index d11d817bc65..a97ab496f5f 100644 --- a/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java +++ b/lucene/src/test/org/apache/lucene/document/TestBinaryDocument.java @@ -1,10 +1,11 @@ package org.apache.lucene.document; -import org.apache.lucene.util.LuceneTestCase; - import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -34,8 +35,10 @@ public class TestBinaryDocument extends LuceneTestCase { public void testBinaryFieldInIndex() throws Exception { - Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes()); - Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO); + FieldType ft = new FieldType(); + ft.setStored(true); + IndexableField binaryFldStored = new BinaryField("binaryStored", binaryValStored.getBytes()); + IndexableField stringFldStored = new Field("stringStored", ft, binaryValStored); Document doc = new Document(); @@ -44,7 +47,7 @@ public class TestBinaryDocument extends LuceneTestCase { doc.add(stringFldStored); /** test for field count */ - assertEquals(2, doc.fields.size()); + assertEquals(2, doc.getFields().size()); /** add the doc to a ram index */ Directory dir = newDirectory(); @@ -57,7 +60,9 @@ public class TestBinaryDocument extends LuceneTestCase { assertTrue(docFromReader != null); /** fetch the binary stored field and compare it's content with the original one */ - String binaryFldStoredTest = new String(docFromReader.getBinaryValue("binaryStored")); + BytesRef bytes = docFromReader.getBinaryValue("binaryStored"); + assertNotNull(bytes); + String binaryFldStoredTest = new String(bytes.bytes, bytes.offset, bytes.length); assertTrue(binaryFldStoredTest.equals(binaryValStored)); /** fetch the string field and compare it's content with the original one */ @@ -77,8 +82,8 @@ public class TestBinaryDocument extends LuceneTestCase { } public void testCompressionTools() throws Exception { - Fieldable binaryFldCompressed = new Field("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes())); - Fieldable stringFldCompressed = new Field("stringCompressed", CompressionTools.compressString(binaryValCompressed)); + IndexableField binaryFldCompressed = new BinaryField("binaryCompressed", CompressionTools.compress(binaryValCompressed.getBytes())); + IndexableField stringFldCompressed = new BinaryField("stringCompressed", CompressionTools.compressString(binaryValCompressed)); Document doc = new Document(); diff --git a/lucene/src/test/org/apache/lucene/document/TestDateTools.java b/lucene/src/test/org/apache/lucene/document/TestDateTools.java index b0f7c82752f..be6fb931477 100644 --- a/lucene/src/test/org/apache/lucene/document/TestDateTools.java +++ b/lucene/src/test/org/apache/lucene/document/TestDateTools.java @@ -196,4 +196,4 @@ public class TestDateTools extends LuceneTestCase { } } -} \ No newline at end of file +} diff --git a/lucene/src/test/org/apache/lucene/document/TestDocument.java b/lucene/src/test/org/apache/lucene/document/TestDocument.java index c505df68c94..cc292e957bb 100644 --- a/lucene/src/test/org/apache/lucene/document/TestDocument.java +++ b/lucene/src/test/org/apache/lucene/document/TestDocument.java @@ -1,6 +1,7 @@ package org.apache.lucene.document; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; @@ -8,6 +9,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; /** @@ -37,22 +39,24 @@ public class TestDocument extends LuceneTestCase { public void testBinaryField() throws Exception { Document doc = new Document(); - Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, - Field.Index.NO); - Fieldable binaryFld = new Field("binary", binaryVal.getBytes()); - Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes()); + + FieldType ft = new FieldType(); + ft.setStored(true); + IndexableField stringFld = new Field("string", ft, binaryVal); + IndexableField binaryFld = new BinaryField("binary", binaryVal.getBytes()); + IndexableField binaryFld2 = new BinaryField("binary", binaryVal2.getBytes()); doc.add(stringFld); doc.add(binaryFld); - assertEquals(2, doc.fields.size()); + assertEquals(2, doc.getFields().size()); - assertTrue(binaryFld.isBinary()); - assertTrue(binaryFld.isStored()); - assertFalse(binaryFld.isIndexed()); - assertFalse(binaryFld.isTokenized()); + assertTrue(binaryFld.binaryValue() != null); + assertTrue(binaryFld.stored()); + assertFalse(binaryFld.indexed()); + assertFalse(binaryFld.tokenized()); - String binaryTest = new String(doc.getBinaryValue("binary")); + String binaryTest = doc.getBinaryValue("binary").utf8ToString(); assertTrue(binaryTest.equals(binaryVal)); String stringTest = doc.get("string"); @@ -60,14 +64,14 @@ public class TestDocument extends LuceneTestCase { doc.add(binaryFld2); - assertEquals(3, doc.fields.size()); + assertEquals(3, doc.getFields().size()); - byte[][] binaryTests = doc.getBinaryValues("binary"); + BytesRef[] binaryTests = doc.getBinaryValues("binary"); assertEquals(2, binaryTests.length); - binaryTest = new String(binaryTests[0]); - String binaryTest2 = new String(binaryTests[1]); + binaryTest = binaryTests[0].utf8ToString(); + String binaryTest2 = binaryTests[1].utf8ToString(); assertFalse(binaryTest.equals(binaryTest2)); @@ -75,10 +79,10 @@ public class TestDocument extends LuceneTestCase { assertTrue(binaryTest2.equals(binaryVal2)); doc.removeField("string"); - assertEquals(2, doc.fields.size()); + assertEquals(2, doc.getFields().size()); doc.removeFields("binary"); - assertEquals(0, doc.fields.size()); + assertEquals(0, doc.getFields().size()); } /** @@ -89,45 +93,48 @@ public class TestDocument extends LuceneTestCase { */ public void testRemoveForNewDocument() throws Exception { Document doc = makeDocumentWithFields(); - assertEquals(8, doc.fields.size()); + assertEquals(8, doc.getFields().size()); doc.removeFields("keyword"); - assertEquals(6, doc.fields.size()); + assertEquals(6, doc.getFields().size()); doc.removeFields("doesnotexists"); // removing non-existing fields is // siltenlty ignored doc.removeFields("keyword"); // removing a field more than once - assertEquals(6, doc.fields.size()); + assertEquals(6, doc.getFields().size()); doc.removeField("text"); - assertEquals(5, doc.fields.size()); + assertEquals(5, doc.getFields().size()); doc.removeField("text"); - assertEquals(4, doc.fields.size()); + assertEquals(4, doc.getFields().size()); doc.removeField("text"); - assertEquals(4, doc.fields.size()); + assertEquals(4, doc.getFields().size()); doc.removeField("doesnotexists"); // removing non-existing fields is // siltenlty ignored - assertEquals(4, doc.fields.size()); + assertEquals(4, doc.getFields().size()); doc.removeFields("unindexed"); - assertEquals(2, doc.fields.size()); + assertEquals(2, doc.getFields().size()); doc.removeFields("unstored"); - assertEquals(0, doc.fields.size()); + assertEquals(0, doc.getFields().size()); doc.removeFields("doesnotexists"); // removing non-existing fields is // siltenlty ignored - assertEquals(0, doc.fields.size()); + assertEquals(0, doc.getFields().size()); } public void testConstructorExceptions() { - new Field("name", "value", Field.Store.YES, Field.Index.NO); // okay - new Field("name", "value", Field.Store.NO, Field.Index.NOT_ANALYZED); // okay + FieldType ft = new FieldType(); + ft.setStored(true); + new Field("name", ft, "value"); // okay + new StringField("name", "value"); // okay try { - new Field("name", "value", Field.Store.NO, Field.Index.NO); + new Field("name", new FieldType(), "value"); fail(); } catch (IllegalArgumentException e) { // expected exception } - new Field("name", "value", Field.Store.YES, Field.Index.NO, - Field.TermVector.NO); // okay + new Field("name", ft, "value"); // okay try { - new Field("name", "value", Field.Store.YES, Field.Index.NO, - Field.TermVector.YES); + FieldType ft2 = new FieldType(); + ft2.setStored(true); + ft2.setStoreTermVectors(true); + new Field("name", ft2, "value"); fail(); } catch (IllegalArgumentException e) { // expected exception @@ -174,28 +181,26 @@ public class TestDocument extends LuceneTestCase { private Document makeDocumentWithFields() { Document doc = new Document(); - doc.add(new Field("keyword", "test1", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc.add(new Field("keyword", "test2", Field.Store.YES, - Field.Index.NOT_ANALYZED)); - doc.add(new Field("text", "test1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("text", "test2", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("unindexed", "test1", Field.Store.YES, Field.Index.NO)); - doc.add(new Field("unindexed", "test2", Field.Store.YES, Field.Index.NO)); + FieldType stored = new FieldType(); + stored.setStored(true); + doc.add(new Field("keyword", StringField.TYPE_STORED, "test1")); + doc.add(new Field("keyword", StringField.TYPE_STORED, "test2")); + doc.add(new Field("text", TextField.TYPE_STORED, "test1")); + doc.add(new Field("text", TextField.TYPE_STORED, "test2")); + doc.add(new Field("unindexed", stored, "test1")); + doc.add(new Field("unindexed", stored, "test2")); doc - .add(new Field("unstored", "test1", Field.Store.NO, - Field.Index.ANALYZED)); + .add(new TextField("unstored", "test1")); doc - .add(new Field("unstored", "test2", Field.Store.NO, - Field.Index.ANALYZED)); + .add(new TextField("unstored", "test2")); return doc; } private void doAssert(Document doc, boolean fromIndex) { - String[] keywordFieldValues = doc.getValues("keyword"); - String[] textFieldValues = doc.getValues("text"); - String[] unindexedFieldValues = doc.getValues("unindexed"); - String[] unstoredFieldValues = doc.getValues("unstored"); + IndexableField[] keywordFieldValues = doc.getFields("keyword"); + IndexableField[] textFieldValues = doc.getFields("text"); + IndexableField[] unindexedFieldValues = doc.getFields("unindexed"); + IndexableField[] unstoredFieldValues = doc.getFields("unstored"); assertTrue(keywordFieldValues.length == 2); assertTrue(textFieldValues.length == 2); @@ -206,28 +211,26 @@ public class TestDocument extends LuceneTestCase { assertTrue(unstoredFieldValues.length == 2); } - assertTrue(keywordFieldValues[0].equals("test1")); - assertTrue(keywordFieldValues[1].equals("test2")); - assertTrue(textFieldValues[0].equals("test1")); - assertTrue(textFieldValues[1].equals("test2")); - assertTrue(unindexedFieldValues[0].equals("test1")); - assertTrue(unindexedFieldValues[1].equals("test2")); + assertTrue(keywordFieldValues[0].stringValue().equals("test1")); + assertTrue(keywordFieldValues[1].stringValue().equals("test2")); + assertTrue(textFieldValues[0].stringValue().equals("test1")); + assertTrue(textFieldValues[1].stringValue().equals("test2")); + assertTrue(unindexedFieldValues[0].stringValue().equals("test1")); + assertTrue(unindexedFieldValues[1].stringValue().equals("test2")); // this test cannot work for documents retrieved from the index // since unstored fields will obviously not be returned if (!fromIndex) { - assertTrue(unstoredFieldValues[0].equals("test1")); - assertTrue(unstoredFieldValues[1].equals("test2")); + assertTrue(unstoredFieldValues[0].stringValue().equals("test1")); + assertTrue(unstoredFieldValues[1].stringValue().equals("test2")); } } public void testFieldSetValue() throws Exception { - Field field = new Field("id", "id1", Field.Store.YES, - Field.Index.NOT_ANALYZED); + Field field = new Field("id", StringField.TYPE_STORED, "id1"); Document doc = new Document(); doc.add(field); - doc.add(new Field("keyword", "test", Field.Store.YES, - Field.Index.NOT_ANALYZED)); + doc.add(new Field("keyword", StringField.TYPE_STORED, "test")); Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); @@ -248,7 +251,7 @@ public class TestDocument extends LuceneTestCase { int result = 0; for (int i = 0; i < 3; i++) { Document doc2 = searcher.doc(hits[i].doc); - Field f = doc2.getField("id"); + Field f = (Field) doc2.getField("id"); if (f.stringValue().equals("id1")) result |= 1; else if (f.stringValue().equals("id2")) result |= 2; else if (f.stringValue().equals("id3")) result |= 4; @@ -262,9 +265,8 @@ public class TestDocument extends LuceneTestCase { } public void testFieldSetValueChangeBinary() { - Field field1 = new Field("field1", new byte[0]); - Field field2 = new Field("field2", "", Field.Store.YES, - Field.Index.ANALYZED); + Field field1 = new BinaryField("field1", new byte[0]); + Field field2 = new Field("field2", TextField.TYPE_STORED, ""); try { field1.setValue("abc"); fail("did not hit expected exception"); diff --git a/lucene/src/test/org/apache/lucene/index/Test2BPostings.java b/lucene/src/test/org/apache/lucene/index/Test2BPostings.java index 006cbb473dd..c6c0fbf636a 100644 --- a/lucene/src/test/org/apache/lucene/index/Test2BPostings.java +++ b/lucene/src/test/org/apache/lucene/index/Test2BPostings.java @@ -24,12 +24,13 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; -import org.junit.Ignore; /** * Test indexes ~82M docs with 26 terms each, so you get > Integer.MAX_VALUE terms/docs pairs @@ -62,9 +63,10 @@ public class Test2BPostings extends LuceneTestCase { } Document doc = new Document(); - Field field = new Field("field", new MyTokenStream()); - field.setIndexOptions(IndexOptions.DOCS_ONLY); - field.setOmitNorms(true); + FieldType ft = new FieldType(TextField.TYPE_UNSTORED); + ft.setOmitNorms(true); + ft.setIndexOptions(IndexOptions.DOCS_ONLY); + Field field = new Field("field", ft, new MyTokenStream()); doc.add(field); final int numDocs = (Integer.MAX_VALUE / 26) + 1; diff --git a/lucene/src/test/org/apache/lucene/index/Test2BTerms.java b/lucene/src/test/org/apache/lucene/index/Test2BTerms.java index 346c6158b28..3f27884887e 100644 --- a/lucene/src/test/org/apache/lucene/index/Test2BTerms.java +++ b/lucene/src/test/org/apache/lucene/index/Test2BTerms.java @@ -25,9 +25,7 @@ import org.apache.lucene.analysis.tokenattributes.*; import org.apache.lucene.document.*; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.codecs.CodecProvider; -import java.io.File; import java.io.IOException; -import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -178,9 +176,11 @@ public class Test2BTerms extends LuceneTestCase { Document doc = new Document(); final MyTokenStream ts = new MyTokenStream(random, TERMS_PER_DOC); - Field field = new Field("field", ts); - field.setIndexOptions(IndexOptions.DOCS_ONLY); - field.setOmitNorms(true); + + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setIndexOptions(IndexOptions.DOCS_ONLY); + customType.setOmitNorms(true); + Field field = new Field("field", customType, ts); doc.add(field); //w.setInfoStream(System.out); final int numDocs = (int) (TERM_COUNT/TERMS_PER_DOC); diff --git a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java index a2c4ec45711..1efa7e97348 100755 --- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java +++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java @@ -25,9 +25,9 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.mocksep.MockSepCodec; @@ -38,7 +38,6 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.LuceneTestCase; @@ -167,9 +166,8 @@ public class TestAddIndexes extends LuceneTestCase { // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); - doc.add(newField("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("content", "bbb " + i, Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("id", "" + (i % 10), StringField.TYPE_UNSTORED)); + doc.add(newField("content", "bbb " + i, TextField.TYPE_UNSTORED)); writer.updateDocument(new Term("id", "" + (i%10)), doc); } // Deletes one of the 10 added docs, leaving 9: @@ -203,8 +201,8 @@ public class TestAddIndexes extends LuceneTestCase { // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); - doc.add(newField("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("content", "bbb " + i, Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("id", "" + (i % 10), StringField.TYPE_UNSTORED)); + doc.add(newField("content", "bbb " + i, TextField.TYPE_UNSTORED)); writer.updateDocument(new Term("id", "" + (i%10)), doc); } @@ -241,9 +239,8 @@ public class TestAddIndexes extends LuceneTestCase { // docs, so 10 pending deletes: for (int i = 0; i < 20; i++) { Document doc = new Document(); - doc.add(newField("id", "" + (i % 10), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("content", "bbb " + i, Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("id", "" + (i % 10), StringField.TYPE_UNSTORED)); + doc.add(newField("content", "bbb " + i, TextField.TYPE_UNSTORED)); writer.updateDocument(new Term("id", "" + (i%10)), doc); } @@ -503,8 +500,7 @@ public class TestAddIndexes extends LuceneTestCase { private void addDocs(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } } @@ -512,8 +508,7 @@ public class TestAddIndexes extends LuceneTestCase { private void addDocs2(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - doc.add(newField("content", "bbb", Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("content", "bbb", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } } @@ -582,20 +577,21 @@ public class TestAddIndexes extends LuceneTestCase { .setMaxBufferedDocs(5).setMergePolicy(lmp)); Document doc = new Document(); - doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType)); for(int i=0;i<60;i++) writer.addDocument(doc); Document doc2 = new Document(); - doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); - doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); - doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); - doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.NO)); + FieldType customType2 = new FieldType(); + customType2.setStored(true); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); + doc2.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType2)); for(int i=0;i<10;i++) writer.addDocument(doc2); writer.close(); @@ -619,7 +615,7 @@ public class TestAddIndexes extends LuceneTestCase { private void addDoc(IndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } @@ -944,7 +940,7 @@ public class TestAddIndexes extends LuceneTestCase { IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)); IndexWriter writer = new IndexWriter(dirs[i], conf); Document doc = new Document(); - doc.add(new Field("id", "myid", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField("id", "myid")); writer.addDocument(doc); writer.close(); } @@ -973,8 +969,8 @@ public class TestAddIndexes extends LuceneTestCase { private void addDocs3(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); - doc.add(newField("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); + doc.add(newField("id", "" + i, TextField.TYPE_STORED)); writer.addDocument(doc); } } @@ -1061,7 +1057,9 @@ public class TestAddIndexes extends LuceneTestCase { dirs[i] = new RAMDirectory(); IndexWriter w = new IndexWriter(dirs[i], new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d = new Document(); - d.add(new Field("c", "v", Store.YES, Index.ANALYZED, TermVector.YES)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + d.add(new Field("c", customType, "v")); w.addDocument(d); w.close(); } @@ -1099,10 +1097,10 @@ public class TestAddIndexes extends LuceneTestCase { new MockAnalyzer(random)).setMergePolicy(lmp2); IndexWriter w2 = new IndexWriter(src, conf2); Document doc = new Document(); - doc.add(new Field("c", "some text", Store.YES, Index.ANALYZED)); + doc.add(new Field("c", TextField.TYPE_STORED, "some text")); w2.addDocument(doc); doc = new Document(); - doc.add(new Field("d", "delete", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField("d", "delete")); w2.addDocument(doc); w2.commit(); w2.deleteDocuments(new Term("d", "delete")); @@ -1152,7 +1150,9 @@ public class TestAddIndexes extends LuceneTestCase { conf.setCodecProvider(provider); IndexWriter w = new IndexWriter(toAdd, conf); Document doc = new Document(); - doc.add(newField("foo", "bar", Index.NOT_ANALYZED)); + FieldType customType = new FieldType(); + customType.setIndexed(true); + doc.add(newField("foo", "bar", customType)); w.addDocument(doc); w.close(); } diff --git a/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java b/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java index d1264cb46d5..8d4bf4330f9 100644 --- a/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java +++ b/lucene/src/test/org/apache/lucene/index/TestAtomicUpdate.java @@ -95,8 +95,8 @@ public class TestAtomicUpdate extends LuceneTestCase { // Update all 100 docs... for(int i=0; i<100; i++) { Document d = new Document(); - d.add(new Field("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); - d.add(new Field("contents", English.intToEnglish(i+10*count), Field.Store.NO, Field.Index.ANALYZED)); + d.add(new Field("id", StringField.TYPE_STORED, Integer.toString(i))); + d.add(new TextField("contents", English.intToEnglish(i+10*count))); writer.updateDocument(new Term("id", Integer.toString(i)), d); } } @@ -136,8 +136,8 @@ public class TestAtomicUpdate extends LuceneTestCase { // Establish a base index of 100 docs: for(int i=0;i<100;i++) { Document d = new Document(); - d.add(newField("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); - d.add(newField("contents", English.intToEnglish(i), Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("id", Integer.toString(i), StringField.TYPE_STORED)); + d.add(newField("contents", English.intToEnglish(i), TextField.TYPE_UNSTORED)); if ((i-1)%7 == 0) { writer.commit(); } diff --git a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 9aa5eba80fa..e294397233f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -21,16 +21,18 @@ import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.PrintStream; -import java.util.Arrays; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; @@ -45,9 +47,9 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Constants; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; -import org.apache.lucene.util.Constants; /* Verify we can read the pre-4.0 file format, do searches @@ -288,11 +290,11 @@ public class TestBackwardsCompatibility extends LuceneTestCase { for(int i=0;i<35;i++) { if (liveDocs.get(i)) { Document d = reader.document(i); - List fields = d.getFields(); + List fields = d.getFields(); if (d.getField("content3") == null) { final int numFields = 5; assertEquals(numFields, fields.size()); - Field f = d.getField("id"); + IndexableField f = d.getField("id"); assertEquals(""+i, f.stringValue()); f = d.getField("utf8"); @@ -594,12 +596,16 @@ public class TestBackwardsCompatibility extends LuceneTestCase { private void addDoc(IndexWriter writer, int id) throws IOException { Document doc = new Document(); - doc.add(new Field("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); - doc.add(new Field("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field("autf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(new Field("utf8", "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(new Field("content2", "here is more content with aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(new Field("fie\u2C77ld", "field with non-ascii name", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new TextField("content", "aaa")); + doc.add(new Field("id", StringField.TYPE_STORED, Integer.toString(id))); + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setStoreTermVectors(true); + customType2.setStoreTermVectorPositions(true); + customType2.setStoreTermVectorOffsets(true); + doc.add(new Field("autf8", customType2, "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd")); + doc.add(new Field("utf8", customType2, "Lu\uD834\uDD1Ece\uD834\uDD60ne \u0000 \u2620 ab\ud917\udc17cd")); + doc.add(new Field("content2", customType2, "here is more content with aaa aaa aaa")); + doc.add(new Field("fie\u2C77ld", customType2, "field with non-ascii name")); // add numeric fields, to test if flex preserves encoding doc.add(new NumericField("trieInt", 4).setIntValue(id)); doc.add(new NumericField("trieLong", 4).setLongValue(id)); @@ -608,11 +614,14 @@ public class TestBackwardsCompatibility extends LuceneTestCase { private void addNoProxDoc(IndexWriter writer) throws IOException { Document doc = new Document(); - Field f = new Field("content3", "aaa", Field.Store.YES, Field.Index.ANALYZED); - f.setIndexOptions(IndexOptions.DOCS_ONLY); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setIndexOptions(IndexOptions.DOCS_ONLY); + Field f = new Field("content3", customType, "aaa"); doc.add(f); - f = new Field("content4", "aaa", Field.Store.YES, Field.Index.NO); - f.setIndexOptions(IndexOptions.DOCS_ONLY); + FieldType customType2 = new FieldType(); + customType2.setStored(true); + customType2.setIndexOptions(IndexOptions.DOCS_ONLY); + f = new Field("content4", customType2, "aaa"); doc.add(f); writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/index/TestBinaryTerms.java b/lucene/src/test/org/apache/lucene/index/TestBinaryTerms.java index 78b910876e7..e2d48a33312 100644 --- a/lucene/src/test/org/apache/lucene/index/TestBinaryTerms.java +++ b/lucene/src/test/org/apache/lucene/index/TestBinaryTerms.java @@ -21,6 +21,8 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TermQuery; @@ -47,8 +49,10 @@ public class TestBinaryTerms extends LuceneTestCase { bytes.bytes[1] = (byte) (255 - i); bytes.length = 2; Document doc = new Document(); - doc.add(new Field("id", "" + i, Field.Store.YES, Field.Index.NO)); - doc.add(new Field("bytes", tokenStream)); + FieldType customType = new FieldType(); + customType.setStored(true); + doc.add(new Field("id", customType, "" + i)); + doc.add(new TextField("bytes", tokenStream)); iw.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java b/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java index 04810a3a7dc..996d889b24c 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java +++ b/lucene/src/test/org/apache/lucene/index/TestCheckIndex.java @@ -27,7 +27,8 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.store.Directory; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.util.Constants; public class TestCheckIndex extends LuceneTestCase { @@ -36,7 +37,11 @@ public class TestCheckIndex extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); Document doc = new Document(); - doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("field", "aaa", customType)); for(int i=0;i<19;i++) { writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/index/TestCodecs.java b/lucene/src/test/org/apache/lucene/index/TestCodecs.java index 142d3e8fdc7..8f2f84447e8 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCodecs.java +++ b/lucene/src/test/org/apache/lucene/index/TestCodecs.java @@ -23,9 +23,9 @@ import java.util.HashSet; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Store; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.codecs.FieldsConsumer; import org.apache.lucene.index.codecs.FieldsProducer; @@ -340,7 +340,9 @@ public class TestCodecs extends LuceneTestCase { pq.add(new Term("content", "ccc")); final Document doc = new Document(); - doc.add(newField("content", "aaa bbb ccc ddd", Store.NO, Field.Index.ANALYZED_NO_NORMS)); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); + doc.add(newField("content", "aaa bbb ccc ddd", customType)); // add document and force commit for creating a first segment writer.addDocument(doc); diff --git a/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java b/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java index c7e5927ecd9..2f41d4e2779 100644 --- a/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java +++ b/lucene/src/test/org/apache/lucene/index/TestConcurrentMergeScheduler.java @@ -17,14 +17,16 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.store.MockDirectoryWrapper; +import java.io.IOException; + import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; - +import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.util.LuceneTestCase; -import java.io.IOException; public class TestConcurrentMergeScheduler extends LuceneTestCase { @@ -75,7 +77,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); - Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + Field idField = newField("id", "", StringField.TYPE_STORED); doc.add(idField); int extraCount = 0; @@ -135,7 +137,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); - Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + Field idField = newField("id", "", StringField.TYPE_STORED); doc.add(idField); for(int i=0;i<10;i++) { if (VERBOSE) { @@ -180,7 +182,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { for(int j=0;j<21;j++) { Document doc = new Document(); - doc.add(newField("content", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "a b c", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } @@ -202,7 +204,7 @@ public class TestConcurrentMergeScheduler extends LuceneTestCase { public void testNoWaitClose() throws IOException { MockDirectoryWrapper directory = newDirectory(); Document doc = new Document(); - Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + Field idField = newField("id", "", StringField.TYPE_STORED); doc.add(idField); IndexWriter writer = new IndexWriter( diff --git a/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java b/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java index 65f57769918..875ea50a7cb 100644 --- a/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java +++ b/lucene/src/test/org/apache/lucene/index/TestConsistentFieldNumbers.java @@ -20,11 +20,12 @@ package org.apache.lucene.index; import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.BinaryField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; @@ -38,8 +39,8 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); - d1.add(new Field("f1", "first field", Store.YES, Index.ANALYZED, TermVector.NO)); - d1.add(new Field("f2", "second field", Store.YES, Index.ANALYZED, TermVector.NO)); + d1.add(new Field("f1", StringField.TYPE_STORED, "first field")); + d1.add(new Field("f2", StringField.TYPE_STORED, "second field")); writer.addDocument(d1); if (i == 1) { @@ -50,10 +51,12 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { } Document d2 = new Document(); - d2.add(new Field("f2", "second field", Store.YES, Index.ANALYZED, TermVector.NO)); - d2.add(new Field("f1", "first field", Store.YES, Index.ANALYZED, TermVector.YES)); - d2.add(new Field("f3", "third field", Store.YES, Index.ANALYZED, TermVector.NO)); - d2.add(new Field("f4", "fourth field", Store.YES, Index.ANALYZED, TermVector.NO)); + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setStoreTermVectors(true); + d2.add(new TextField("f2", "second field")); + d2.add(new Field("f1", customType2, "first field")); + d2.add(new TextField("f3", "third field")); + d2.add(new TextField("f4", "fourth field")); writer.addDocument(d2); writer.close(); @@ -99,18 +102,20 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d1 = new Document(); - d1.add(new Field("f1", "first field", Store.YES, Index.ANALYZED, TermVector.NO)); - d1.add(new Field("f2", "second field", Store.YES, Index.ANALYZED, TermVector.NO)); + d1.add(new Field("f1", TextField.TYPE_STORED, "first field")); + d1.add(new Field("f2", TextField.TYPE_STORED, "second field")); writer.addDocument(d1); writer.close(); writer = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(NoMergePolicy.COMPOUND_FILES)); Document d2 = new Document(); - d2.add(new Field("f2", "second field", Store.YES, Index.ANALYZED, TermVector.NO)); - d2.add(new Field("f1", "first field", Store.YES, Index.ANALYZED, TermVector.YES)); - d2.add(new Field("f3", "third field", Store.YES, Index.ANALYZED, TermVector.NO)); - d2.add(new Field("f4", "fourth field", Store.YES, Index.ANALYZED, TermVector.NO)); + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setStoreTermVectors(true); + d2.add(new Field("f2", TextField.TYPE_STORED, "second field")); + d2.add(new Field("f1", customType2, "first field")); + d2.add(new Field("f3", TextField.TYPE_STORED, "third field")); + d2.add(new Field("f4", TextField.TYPE_STORED, "fourth field")); writer.addDocument(d2); writer.close(); @@ -163,10 +168,8 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy( NoMergePolicy.NO_COMPOUND_FILES)); Document d = new Document(); - d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f2", "d1 second field", Store.YES, Index.ANALYZED, - TermVector.NO)); + d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field")); + d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field")); writer.addDocument(d); writer.close(); SegmentInfos sis = new SegmentInfos(); @@ -185,9 +188,8 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { random.nextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); - d.add(new Field("f1", "d2 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f3", new byte[] { 1, 2, 3 })); + d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field")); + d.add(new BinaryField("f3", new byte[] { 1, 2, 3 })); writer.addDocument(d); writer.close(); SegmentInfos sis = new SegmentInfos(); @@ -210,11 +212,9 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { random.nextBoolean() ? NoMergePolicy.NO_COMPOUND_FILES : NoMergePolicy.COMPOUND_FILES)); Document d = new Document(); - d.add(new Field("f1", "d3 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f2", "d3 second field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f3", new byte[] { 1, 2, 3, 4, 5 })); + d.add(new Field("f1", TextField.TYPE_STORED, "d3 first field")); + d.add(new Field("f2", TextField.TYPE_STORED, "d3 second field")); + d.add(new BinaryField("f3", new byte[] { 1, 2, 3, 4, 5 })); writer.addDocument(d); writer.close(); SegmentInfos sis = new SegmentInfos(); @@ -303,10 +303,10 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { for (FieldInfo fi : fis) { Field expected = getField(Integer.parseInt(fi.name)); - assertEquals(expected.isIndexed(), fi.isIndexed); - assertEquals(expected.isTermVectorStored(), fi.storeTermVector); - assertEquals(expected.isStorePositionWithTermVector(), fi.storePositionWithTermVector); - assertEquals(expected.isStoreOffsetWithTermVector(), fi.storeOffsetWithTermVector); + assertEquals(expected.indexed(), fi.isIndexed); + assertEquals(expected.storeTermVectors(), fi.storeTermVector); + assertEquals(expected.storeTermVectorPositions(), fi.storePositionWithTermVector); + assertEquals(expected.storeTermVectorOffsets(), fi.storeOffsetWithTermVector); } } @@ -316,23 +316,91 @@ public class TestConsistentFieldNumbers extends LuceneTestCase { private Field getField(int number) { int mode = number % 16; String fieldName = "" + number; + FieldType customType = new FieldType(TextField.TYPE_STORED); + + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setTokenized(false); + + FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED); + customType3.setTokenized(false); + + FieldType customType4 = new FieldType(TextField.TYPE_UNSTORED); + customType4.setTokenized(false); + customType4.setStoreTermVectors(true); + customType4.setStoreTermVectorOffsets(true); + + FieldType customType5 = new FieldType(TextField.TYPE_UNSTORED); + customType5.setStoreTermVectors(true); + customType5.setStoreTermVectorOffsets(true); + + FieldType customType6 = new FieldType(TextField.TYPE_STORED); + customType6.setTokenized(false); + customType6.setStoreTermVectors(true); + customType6.setStoreTermVectorOffsets(true); + + FieldType customType7 = new FieldType(TextField.TYPE_UNSTORED); + customType7.setTokenized(false); + customType7.setStoreTermVectors(true); + customType7.setStoreTermVectorOffsets(true); + + FieldType customType8 = new FieldType(TextField.TYPE_STORED); + customType8.setTokenized(false); + customType8.setStoreTermVectors(true); + customType8.setStoreTermVectorPositions(true); + + FieldType customType9 = new FieldType(TextField.TYPE_UNSTORED); + customType9.setStoreTermVectors(true); + customType9.setStoreTermVectorPositions(true); + + FieldType customType10 = new FieldType(TextField.TYPE_STORED); + customType10.setTokenized(false); + customType10.setStoreTermVectors(true); + customType10.setStoreTermVectorPositions(true); + + FieldType customType11 = new FieldType(TextField.TYPE_UNSTORED); + customType11.setTokenized(false); + customType11.setStoreTermVectors(true); + customType11.setStoreTermVectorPositions(true); + + FieldType customType12 = new FieldType(TextField.TYPE_STORED); + customType12.setStoreTermVectors(true); + customType12.setStoreTermVectorOffsets(true); + customType12.setStoreTermVectorPositions(true); + + FieldType customType13 = new FieldType(TextField.TYPE_UNSTORED); + customType13.setStoreTermVectors(true); + customType13.setStoreTermVectorOffsets(true); + customType13.setStoreTermVectorPositions(true); + + FieldType customType14 = new FieldType(TextField.TYPE_STORED); + customType14.setTokenized(false); + customType14.setStoreTermVectors(true); + customType14.setStoreTermVectorOffsets(true); + customType14.setStoreTermVectorPositions(true); + + FieldType customType15 = new FieldType(TextField.TYPE_UNSTORED); + customType15.setTokenized(false); + customType15.setStoreTermVectors(true); + customType15.setStoreTermVectorOffsets(true); + customType15.setStoreTermVectorPositions(true); + switch (mode) { - case 0: return new Field(fieldName, "some text", Store.YES, Index.ANALYZED, TermVector.NO); - case 1: return new Field(fieldName, "some text", Store.NO, Index.ANALYZED, TermVector.NO); - case 2: return new Field(fieldName, "some text", Store.YES, Index.NOT_ANALYZED, TermVector.NO); - case 3: return new Field(fieldName, "some text", Store.NO, Index.NOT_ANALYZED, TermVector.NO); - case 4: return new Field(fieldName, "some text", Store.YES, Index.ANALYZED, TermVector.WITH_OFFSETS); - case 5: return new Field(fieldName, "some text", Store.NO, Index.ANALYZED, TermVector.WITH_OFFSETS); - case 6: return new Field(fieldName, "some text", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_OFFSETS); - case 7: return new Field(fieldName, "some text", Store.NO, Index.NOT_ANALYZED, TermVector.WITH_OFFSETS); - case 8: return new Field(fieldName, "some text", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS); - case 9: return new Field(fieldName, "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS); - case 10: return new Field(fieldName, "some text", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS); - case 11: return new Field(fieldName, "some text", Store.NO, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS); - case 12: return new Field(fieldName, "some text", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS); - case 13: return new Field(fieldName, "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS); - case 14: return new Field(fieldName, "some text", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS); - case 15: return new Field(fieldName, "some text", Store.NO, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS); + case 0: return new Field(fieldName, customType, "some text"); + case 1: return new TextField(fieldName, "some text"); + case 2: return new Field(fieldName, customType2, "some text"); + case 3: return new Field(fieldName, customType3, "some text"); + case 4: return new Field(fieldName, customType4, "some text"); + case 5: return new Field(fieldName, customType5, "some text"); + case 6: return new Field(fieldName, customType6, "some text"); + case 7: return new Field(fieldName, customType7, "some text"); + case 8: return new Field(fieldName, customType8, "some text"); + case 9: return new Field(fieldName, customType9, "some text"); + case 10: return new Field(fieldName, customType10, "some text"); + case 11: return new Field(fieldName, customType11, "some text"); + case 12: return new Field(fieldName, customType12, "some text"); + case 13: return new Field(fieldName, customType13, "some text"); + case 14: return new Field(fieldName, customType14, "some text"); + case 15: return new Field(fieldName, customType15, "some text"); default: return null; } } diff --git a/lucene/src/test/org/apache/lucene/index/TestCrash.java b/lucene/src/test/org/apache/lucene/index/TestCrash.java index 13e86437a9a..a121a6cb657 100644 --- a/lucene/src/test/org/apache/lucene/index/TestCrash.java +++ b/lucene/src/test/org/apache/lucene/index/TestCrash.java @@ -25,7 +25,7 @@ import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; public class TestCrash extends LuceneTestCase { @@ -44,8 +44,8 @@ public class TestCrash extends LuceneTestCase { } Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("id", "0", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); + doc.add(newField("id", "0", TextField.TYPE_UNSTORED)); for(int i=0;i<157;i++) writer.addDocument(doc); diff --git a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java index 9c4994ebb43..ada1e4abb82 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestDeletionPolicy.java @@ -25,7 +25,7 @@ import java.util.Collection; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; @@ -853,7 +853,7 @@ public class TestDeletionPolicy extends LuceneTestCase { private void addDoc(IndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } } diff --git a/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java b/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java index e8bd6d69234..e756e508a19 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestDirectoryReader.java @@ -21,7 +21,7 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -202,7 +202,7 @@ public class TestDirectoryReader extends LuceneTestCase { new MockAnalyzer(random)).setOpenMode( create ? OpenMode.CREATE : OpenMode.APPEND)); Document doc = new Document(); - doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("body", s, TextField.TYPE_UNSTORED)); iw.addDocument(doc); iw.close(); } diff --git a/lucene/src/test/org/apache/lucene/index/TestDoc.java b/lucene/src/test/org/apache/lucene/index/TestDoc.java index b4d23b57ce7..51e3695676e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDoc.java +++ b/lucene/src/test/org/apache/lucene/index/TestDoc.java @@ -31,14 +31,13 @@ import junit.textui.TestRunner; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; -import org.apache.lucene.index.codecs.CodecProvider; /** JUnit adaptation of an older test case DocTest. */ @@ -185,7 +184,7 @@ public class TestDoc extends LuceneTestCase { { File file = new File(workDir, fileName); Document doc = new Document(); - doc.add(new Field("contents", new FileReader(file))); + doc.add(new TextField("contents", new FileReader(file))); writer.addDocument(doc); writer.commit(); return writer.newestSegment(); diff --git a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java index 0bdb2ff86ea..5d798aa0fef 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocTermOrds.java @@ -28,6 +28,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.DocTermOrds.TermOrdsIterator; import org.apache.lucene.index.codecs.BlockTermsReader; import org.apache.lucene.index.codecs.BlockTermsWriter; @@ -66,7 +68,7 @@ public class TestDocTermOrds extends LuceneTestCase { Directory dir = newDirectory(); final RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); - Field field = newField("field", "", Field.Index.ANALYZED); + Field field = newField("field", "", TextField.TYPE_UNSTORED); doc.add(field); field.setValue("a b c"); w.addDocument(doc); @@ -281,7 +283,7 @@ public class TestDocTermOrds extends LuceneTestCase { } for(int ord : ordsForDocSet) { ordsForDoc[upto++] = ord; - Field field = newField("field", termsArray[ord].utf8ToString(), Field.Index.NOT_ANALYZED_NO_NORMS); + Field field = newField("field", termsArray[ord].utf8ToString(), StringField.TYPE_UNSTORED); if (VERBOSE) { System.out.println(" f=" + termsArray[ord].utf8ToString()); } @@ -384,7 +386,7 @@ public class TestDocTermOrds extends LuceneTestCase { } for(int ord : ordsForDocSet) { ordsForDoc[upto++] = ord; - Field field = newField("field", termsArray[ord].utf8ToString(), Field.Index.NOT_ANALYZED_NO_NORMS); + Field field = newField("field", termsArray[ord].utf8ToString(), StringField.TYPE_UNSTORED); if (VERBOSE) { System.out.println(" f=" + termsArray[ord].utf8ToString()); } diff --git a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java index 11f383461d2..a4eac5c7042 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocsAndPositions.java @@ -22,7 +22,8 @@ import java.util.Arrays; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.store.Directory; @@ -49,9 +50,11 @@ public class TestDocsAndPositions extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); for (int i = 0; i < 39; i++) { Document doc = new Document(); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); doc.add(newField(fieldName, "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " + "1 2 3 4 5 6 7 8 9 10 " - + "1 2 3 4 5 6 7 8 9 10", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); + + "1 2 3 4 5 6 7 8 9 10", customType)); writer.addDocument(doc); } IndexReader reader = writer.getReader(); @@ -117,6 +120,8 @@ public class TestDocsAndPositions extends LuceneTestCase { int max = 1051; int term = random.nextInt(max); Integer[][] positionsInDoc = new Integer[numDocs][]; + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); ArrayList positions = new ArrayList(); @@ -133,8 +138,7 @@ public class TestDocsAndPositions extends LuceneTestCase { builder.append(term); positions.add(num); } - doc.add(newField(fieldName, builder.toString(), Field.Store.NO, - Field.Index.ANALYZED_NO_NORMS)); + doc.add(newField(fieldName, builder.toString(), customType)); positionsInDoc[i] = positions.toArray(new Integer[0]); writer.addDocument(doc); } @@ -200,6 +204,8 @@ public class TestDocsAndPositions extends LuceneTestCase { int max = 15678; int term = random.nextInt(max); int[] freqInDoc = new int[numDocs]; + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); StringBuilder builder = new StringBuilder(); @@ -210,8 +216,7 @@ public class TestDocsAndPositions extends LuceneTestCase { freqInDoc[i]++; } } - doc.add(newField(fieldName, builder.toString(), Field.Store.NO, - Field.Index.ANALYZED_NO_NORMS)); + doc.add(newField(fieldName, builder.toString(), customType)); writer.addDocument(doc); } @@ -276,6 +281,8 @@ public class TestDocsAndPositions extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); int howMany = 1000; + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); for (int i = 0; i < 39; i++) { Document doc = new Document(); StringBuilder builder = new StringBuilder(); @@ -286,8 +293,7 @@ public class TestDocsAndPositions extends LuceneTestCase { builder.append("odd "); } } - doc.add(newField(fieldName, builder.toString(), Field.Store.NO, - Field.Index.ANALYZED_NO_NORMS)); + doc.add(newField(fieldName, builder.toString(), customType)); writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java index 376dd280256..1634e0581b3 100644 --- a/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestDocumentWriter.java @@ -30,14 +30,11 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IOContext.Context; import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; @@ -77,15 +74,15 @@ public class TestDocumentWriter extends LuceneTestCase { assertTrue(doc != null); //System.out.println("Document: " + doc); - Fieldable [] fields = doc.getFields("textField2"); + IndexableField [] fields = doc.getFields("textField2"); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT)); - assertTrue(fields[0].isTermVectorStored()); + assertTrue(fields[0].storeTermVectors()); fields = doc.getFields("textField1"); assertTrue(fields != null && fields.length == 1); assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT)); - assertFalse(fields[0].isTermVectorStored()); + assertFalse(fields[0].storeTermVectors()); fields = doc.getFields("keyField"); assertTrue(fields != null && fields.length == 1); @@ -125,8 +122,8 @@ public class TestDocumentWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - doc.add(newField("repeated", "repeated one", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("repeated", "repeated two", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("repeated", "repeated one", TextField.TYPE_STORED)); + doc.add(newField("repeated", "repeated two", TextField.TYPE_STORED)); writer.addDocument(doc); writer.commit(); @@ -197,7 +194,7 @@ public class TestDocumentWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - doc.add(newField("f1", "a 5 a a", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("f1", "a 5 a a", TextField.TYPE_STORED)); writer.addDocument(doc); writer.commit(); @@ -223,8 +220,8 @@ public class TestDocumentWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); - - doc.add(new Field("preanalyzed", new TokenStream() { + + doc.add(new TextField("preanalyzed", new TokenStream() { private String[] tokens = new String[] {"term1", "term2", "term3", "term2"}; private int index = 0; @@ -241,7 +238,7 @@ public class TestDocumentWriter extends LuceneTestCase { } } - }, TermVector.NO)); + })); writer.addDocument(doc); writer.commit(); @@ -274,11 +271,15 @@ public class TestDocumentWriter extends LuceneTestCase { public void testMixedTermVectorSettingsSameField() throws Exception { Document doc = new Document(); // f1 first without tv then with tv - doc.add(newField("f1", "v1", Store.YES, Index.NOT_ANALYZED, TermVector.NO)); - doc.add(newField("f1", "v2", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("f1", "v1", StringField.TYPE_STORED)); + FieldType customType2 = new FieldType(StringField.TYPE_STORED); + customType2.setStoreTermVectors(true); + customType2.setStoreTermVectorOffsets(true); + customType2.setStoreTermVectorPositions(true); + doc.add(newField("f1", "v2", customType2)); // f2 first with tv then without tv - doc.add(newField("f2", "v1", Store.YES, Index.NOT_ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(newField("f2", "v2", Store.YES, Index.NOT_ANALYZED, TermVector.NO)); + doc.add(newField("f2", "v1", customType2)); + doc.add(newField("f2", "v2", StringField.TYPE_STORED)); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); @@ -307,13 +308,18 @@ public class TestDocumentWriter extends LuceneTestCase { public void testLUCENE_1590() throws Exception { Document doc = new Document(); // f1 has no norms - doc.add(newField("f1", "v1", Store.NO, Index.ANALYZED_NO_NORMS)); - doc.add(newField("f1", "v2", Store.YES, Index.NO)); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); + FieldType customType2 = new FieldType(); + customType2.setStored(true); + doc.add(newField("f1", "v1", customType)); + doc.add(newField("f1", "v2", customType2)); // f2 has no TF - Field f = newField("f2", "v1", Store.NO, Index.ANALYZED); - f.setIndexOptions(IndexOptions.DOCS_ONLY); + FieldType customType3 = new FieldType(TextField.TYPE_UNSTORED); + customType3.setIndexOptions(IndexOptions.DOCS_ONLY); + Field f = newField("f2", "v1", customType3); doc.add(f); - doc.add(newField("f2", "v2", Store.YES, Index.NO)); + doc.add(newField("f2", "v2", customType2)); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); diff --git a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java index 101dbdbb967..07a4b642ba9 100644 --- a/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestFieldsReader.java @@ -25,15 +25,9 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.LoadFirstFieldSelector; -import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.FieldCache; -import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; @@ -48,7 +42,6 @@ public class TestFieldsReader extends LuceneTestCase { private static Directory dir; private static Document testDoc = new Document(); private static FieldInfos fieldInfos = null; - private final static String TEST_SEGMENT_NAME = "_0"; @BeforeClass public static void beforeClass() throws Exception { @@ -61,14 +54,9 @@ public class TestFieldsReader extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, conf); writer.addDocument(testDoc); writer.close(); - } - - @Override - public void setUp() throws Exception { - super.setUp(); FaultyIndexInput.doFail = false; } - + @AfterClass public static void afterClass() throws Exception { dir.close(); @@ -76,326 +64,49 @@ public class TestFieldsReader extends LuceneTestCase { fieldInfos = null; testDoc = null; } + public void test() throws IOException { assertTrue(dir != null); assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); - assertTrue(reader.size() == 1); - Document doc = reader.doc(0, null); + IndexReader reader = IndexReader.open(dir); + Document doc = reader.document(0); assertTrue(doc != null); assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null); - Fieldable field = doc.getField(DocHelper.TEXT_FIELD_2_KEY); + Field field = (Field) doc.getField(DocHelper.TEXT_FIELD_2_KEY); assertTrue(field != null); - assertTrue(field.isTermVectorStored() == true); + assertTrue(field.storeTermVectors() == true); - assertTrue(field.isStoreOffsetWithTermVector() == true); - assertTrue(field.isStorePositionWithTermVector() == true); - assertTrue(field.getOmitNorms() == false); - assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + assertTrue(field.storeTermVectorOffsets() == true); + assertTrue(field.storeTermVectorPositions() == true); + assertTrue(field.omitNorms() == false); + assertTrue(field.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); - field = doc.getField(DocHelper.TEXT_FIELD_3_KEY); + field = (Field) doc.getField(DocHelper.TEXT_FIELD_3_KEY); assertTrue(field != null); - assertTrue(field.isTermVectorStored() == false); - assertTrue(field.isStoreOffsetWithTermVector() == false); - assertTrue(field.isStorePositionWithTermVector() == false); - assertTrue(field.getOmitNorms() == true); - assertTrue(field.getIndexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + assertTrue(field.storeTermVectors() == false); + assertTrue(field.storeTermVectorOffsets() == false); + assertTrue(field.storeTermVectorPositions() == false); + assertTrue(field.omitNorms() == true); + assertTrue(field.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); - field = doc.getField(DocHelper.NO_TF_KEY); + field = (Field) doc.getField(DocHelper.NO_TF_KEY); assertTrue(field != null); - assertTrue(field.isTermVectorStored() == false); - assertTrue(field.isStoreOffsetWithTermVector() == false); - assertTrue(field.isStorePositionWithTermVector() == false); - assertTrue(field.getOmitNorms() == false); - assertTrue(field.getIndexOptions() == IndexOptions.DOCS_ONLY); + assertTrue(field.storeTermVectors() == false); + assertTrue(field.storeTermVectorOffsets() == false); + assertTrue(field.storeTermVectorPositions() == false); + assertTrue(field.omitNorms() == false); + assertTrue(field.indexOptions() == IndexOptions.DOCS_ONLY); + + DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(DocHelper.TEXT_FIELD_3_KEY); + reader.document(0, visitor); + final List fields = visitor.getDocument().getFields(); + assertEquals(1, fields.size()); + assertEquals(DocHelper.TEXT_FIELD_3_KEY, fields.get(0).name()); reader.close(); } - public void testLazyFields() throws Exception { - assertTrue(dir != null); - assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); - assertTrue(reader.size() == 1); - Set loadFieldNames = new HashSet(); - loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY); - loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY); - Set lazyFieldNames = new HashSet(); - //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; - lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); - lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY); - lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY); - lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY); - SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); - Document doc = reader.doc(0, fieldSelector); - assertTrue("doc is null and it shouldn't be", doc != null); - Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("field is not lazy and it should be", field.isLazy()); - String value = field.stringValue(); - assertTrue("value is null and it shouldn't be", value != null); - assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true); - assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); - - field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("Field is lazy and it should not be", field.isLazy() == false); - field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("Field is lazy and it should not be", field.isLazy() == false); - assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true); - - field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("Field is lazy and it should not be", field.isLazy() == true); - assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true); - - field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null); - - byte [] bytes = field.getBinaryValue(); - assertTrue("bytes is null and it shouldn't be", bytes != null); - assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length); - assertTrue("calling binaryValue() twice should give same reference", field.getBinaryValue() == field.getBinaryValue()); - for (int i = 0; i < bytes.length; i++) { - assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]); - - } - reader.close(); - } - - public void testLatentFields() throws Exception { - assertTrue(dir != null); - assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); - assertTrue(reader.size() == 1); - Set loadFieldNames = new HashSet(); - loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY); - loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY); - Set lazyFieldNames = new HashSet(); - //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY}; - lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); - lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY); - lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY); - lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY); - - // Use LATENT instead of LAZY - SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames) { - @Override - public FieldSelectorResult accept(String fieldName) { - final FieldSelectorResult result = super.accept(fieldName); - if (result == FieldSelectorResult.LAZY_LOAD) { - return FieldSelectorResult.LATENT; - } else { - return result; - } - } - }; - - Document doc = reader.doc(0, fieldSelector); - assertTrue("doc is null and it shouldn't be", doc != null); - Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("field is not lazy and it should be", field.isLazy()); - String value = field.stringValue(); - assertTrue("value is null and it shouldn't be", value != null); - assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true); - assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue()); - - field = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("Field is lazy and it should not be", field.isLazy() == false); - assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); - - field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF1_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("Field is lazy and it should not be", field.isLazy() == false); - assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true); - assertTrue("calling stringValue() twice should give same reference", field.stringValue() == field.stringValue()); - - field = doc.getFieldable(DocHelper.TEXT_FIELD_UTF2_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("Field is lazy and it should not be", field.isLazy() == true); - assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true); - assertTrue("calling stringValue() twice should give different references", field.stringValue() != field.stringValue()); - - field = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("stringValue isn't null for lazy binary field", field.stringValue() == null); - assertTrue("calling binaryValue() twice should give different references", field.getBinaryValue() != field.getBinaryValue()); - - byte [] bytes = field.getBinaryValue(); - assertTrue("bytes is null and it shouldn't be", bytes != null); - assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length); - for (int i = 0; i < bytes.length; i++) { - assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]); - - } - reader.close(); - } - - - - - public void testLazyFieldsAfterClose() throws Exception { - assertTrue(dir != null); - assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); - assertTrue(reader.size() == 1); - Set loadFieldNames = new HashSet(); - loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY); - loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY); - Set lazyFieldNames = new HashSet(); - lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); - lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY); - lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY); - lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY); - SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames); - Document doc = reader.doc(0, fieldSelector); - assertTrue("doc is null and it shouldn't be", doc != null); - Fieldable field = doc.getFieldable(DocHelper.LAZY_FIELD_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("field is not lazy and it should be", field.isLazy()); - reader.close(); - try { - field.stringValue(); - fail("did not hit AlreadyClosedException as expected"); - } catch (AlreadyClosedException e) { - // expected - } - } - - public void testLoadFirst() throws Exception { - assertTrue(dir != null); - assertTrue(fieldInfos != null); - FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); - assertTrue(reader.size() == 1); - LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector(); - Document doc = reader.doc(0, fieldSelector); - assertTrue("doc is null and it shouldn't be", doc != null); - int count = 0; - List l = doc.getFields(); - for (final Fieldable fieldable : l ) { - Field field = (Field) fieldable; - - assertTrue("field is null and it shouldn't be", field != null); - String sv = field.stringValue(); - assertTrue("sv is null and it shouldn't be", sv != null); - count++; - } - assertTrue(count + " does not equal: " + 1, count == 1); - reader.close(); - } - - /** - * Not really a test per se, but we should have some way of assessing whether this is worthwhile. - *

      - * Must test using a File based directory - * - * @throws Exception - */ - public void testLazyPerformance() throws Exception { - String userName = System.getProperty("user.name"); - File file = _TestUtil.getTempDir("lazyDir" + userName); - Directory tmpDir = newFSDirectory(file); - assertTrue(tmpDir != null); - - IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE).setMergePolicy(newLogMergePolicy()); - ((LogMergePolicy) conf.getMergePolicy()).setUseCompoundFile(false); - IndexWriter writer = new IndexWriter(tmpDir, conf); - writer.addDocument(testDoc); - writer.close(); - - assertTrue(fieldInfos != null); - FieldsReader reader; - long lazyTime = 0; - long regularTime = 0; - int length = 10; - Set lazyFieldNames = new HashSet(); - lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY); - SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections. emptySet(), lazyFieldNames); - - for (int i = 0; i < length; i++) { - reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); - assertTrue(reader.size() == 1); - - Document doc; - doc = reader.doc(0, null);//Load all of them - assertTrue("doc is null and it shouldn't be", doc != null); - Fieldable field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); - assertTrue("field is null and it shouldn't be", field != null); - assertTrue("field is lazy", field.isLazy() == false); - String value; - long start; - long finish; - start = System.currentTimeMillis(); - //On my machine this was always 0ms. - value = field.stringValue(); - finish = System.currentTimeMillis(); - assertTrue("value is null and it shouldn't be", value != null); - regularTime += (finish - start); - reader.close(); - reader = null; - doc = null; - //Hmmm, are we still in cache??? - System.gc(); - reader = new FieldsReader(tmpDir, TEST_SEGMENT_NAME, fieldInfos); - doc = reader.doc(0, fieldSelector); - field = doc.getFieldable(DocHelper.LARGE_LAZY_FIELD_KEY); - assertTrue("field is not lazy", field.isLazy() == true); - start = System.currentTimeMillis(); - //On my machine this took around 50 - 70ms - value = field.stringValue(); - finish = System.currentTimeMillis(); - assertTrue("value is null and it shouldn't be", value != null); - lazyTime += (finish - start); - reader.close(); - - } - tmpDir.close(); - if (VERBOSE) { - System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads"); - System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads"); - } - } - - public void testLoadSize() throws IOException { - FieldsReader reader = new FieldsReader(dir, TEST_SEGMENT_NAME, fieldInfos); - Document doc; - - doc = reader.doc(0, new FieldSelector(){ - public FieldSelectorResult accept(String fieldName) { - if (fieldName.equals(DocHelper.TEXT_FIELD_1_KEY) || - fieldName.equals(DocHelper.LAZY_FIELD_BINARY_KEY)) - return FieldSelectorResult.SIZE; - else if (fieldName.equals(DocHelper.TEXT_FIELD_3_KEY)) - return FieldSelectorResult.LOAD; - else - return FieldSelectorResult.NO_LOAD; - } - }); - Fieldable f1 = doc.getFieldable(DocHelper.TEXT_FIELD_1_KEY); - Fieldable f3 = doc.getFieldable(DocHelper.TEXT_FIELD_3_KEY); - Fieldable fb = doc.getFieldable(DocHelper.LAZY_FIELD_BINARY_KEY); - assertTrue(f1.isBinary()); - assertTrue(!f3.isBinary()); - assertTrue(fb.isBinary()); - assertSizeEquals(2*DocHelper.FIELD_1_TEXT.length(), f1.getBinaryValue()); - assertEquals(DocHelper.FIELD_3_TEXT, f3.stringValue()); - assertSizeEquals(DocHelper.LAZY_FIELD_BINARY_BYTES.length, fb.getBinaryValue()); - - reader.close(); - } - - private void assertSizeEquals(int size, byte[] sizebytes) { - assertEquals((byte) (size>>>24), sizebytes[0]); - assertEquals((byte) (size>>>16), sizebytes[1]); - assertEquals((byte) (size>>> 8), sizebytes[2]); - assertEquals((byte) size , sizebytes[3]); - } - public static class FaultyFSDirectory extends Directory { Directory fsDir; @@ -528,7 +239,7 @@ public class TestFieldsReader extends LuceneTestCase { final NumericField.DataType[] typeAnswers = new NumericField.DataType[numDocs]; for(int id=0;id files = writer.getIndexFileNames(); @@ -122,17 +117,14 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { new MockAnalyzer(random)); IndexWriter writer = new IndexWriter(dir, config); Document d = new Document(); - d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f2", "d1 second field", Store.YES, Index.ANALYZED, - TermVector.NO)); + d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field")); + d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field")); writer.addDocument(d); writer.commit(); assertFNXFiles(dir, "1.fnx"); d = new Document(); - d.add(new Field("f1", "d2 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f3", new byte[] { 1, 2, 3 })); + d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field")); + d.add(new BinaryField("f3", new byte[] { 1, 2, 3 })); writer.addDocument(d); writer.commit(); assertFNXFiles(dir, "2.fnx"); @@ -166,17 +158,14 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy( new KeepAllDeletionPolicy())); Document d = new Document(); - d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f2", "d1 second field", Store.YES, Index.ANALYZED, - TermVector.NO)); + d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field")); + d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field")); writer.addDocument(d); writer.commit(); assertFNXFiles(dir, "1.fnx"); d = new Document(); - d.add(new Field("f1", "d2 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f3", new byte[] { 1, 2, 3 })); + d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field")); + d.add(new BinaryField("f3", new byte[] { 1, 2, 3 })); writer.addDocument(d); writer.commit(); writer.commit(); @@ -190,11 +179,9 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d = new Document(); - d.add(new Field("f1", "d3 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f2", "d3 second field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f3", new byte[] { 1, 2, 3, 4, 5 })); + d.add(new Field("f1", TextField.TYPE_STORED, "d3 first field")); + d.add(new Field("f2", TextField.TYPE_STORED, "d3 second field")); + d.add(new BinaryField("f3", new byte[] { 1, 2, 3, 4, 5 })); writer.addDocument(d); writer.close(); assertFNXFiles(dir, "2.fnx"); @@ -217,17 +204,14 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer(random)).setIndexDeletionPolicy( new KeepAllDeletionPolicy())); Document d = new Document(); - d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f2", "d1 second field", Store.YES, Index.ANALYZED, - TermVector.NO)); + d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field")); + d.add(new Field("f2", TextField.TYPE_STORED, "d1 second field")); writer.addDocument(d); writer.commit(); assertFNXFiles(dir, "1.fnx"); d = new Document(); - d.add(new Field("f1", "d2 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f3", new byte[] { 1, 2, 3 })); + d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field")); + d.add(new BinaryField("f3", new byte[] { 1, 2, 3 })); writer.addDocument(d); assertFNXFiles(dir, "1.fnx"); writer.close(); @@ -240,9 +224,8 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { new KeepAllDeletionPolicy()).setIndexCommit(listCommits.get(0))); d = new Document(); - d.add(new Field("f1", "d2 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); - d.add(new Field("f3", new byte[] { 1, 2, 3 })); + d.add(new Field("f1", TextField.TYPE_STORED, "d2 first field")); + d.add(new BinaryField("f3", new byte[] { 1, 2, 3 })); writer.addDocument(d); writer.commit(); // now we have 3 files since f3 is not present in the first commit @@ -271,9 +254,13 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { Document doc = new Document(); final int numFields = 1 + random.nextInt(fieldNames.length); for (int j = 0; j < numFields; j++) { + FieldType customType = new FieldType(); + customType.setIndexed(true); + customType.setTokenized(random.nextBoolean()); + customType.setOmitNorms(random.nextBoolean()); doc.add(newField(fieldNames[random.nextInt(fieldNames.length)], _TestUtil.randomRealisticUnicodeString(random), - Index.toIndex(true, random.nextBoolean(), random.nextBoolean()))); + customType)); } writer.addDocument(doc); @@ -322,9 +309,13 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); for (String string : fieldNames) { + FieldType customType = new FieldType(); + customType.setIndexed(true); + customType.setTokenized(random.nextBoolean()); + customType.setOmitNorms(random.nextBoolean()); doc.add(newField(string, _TestUtil.randomRealisticUnicodeString(random), - Index.toIndex(true, random.nextBoolean(), random.nextBoolean()))); + customType)); } writer.addDocument(doc); @@ -419,8 +410,12 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { String name = copySortedMap.get(nextField); assertNotNull(name); + FieldType customType = new FieldType(); + customType.setIndexed(true); + customType.setTokenized(random.nextBoolean()); + customType.setOmitNorms(random.nextBoolean()); doc.add(newField(name, _TestUtil.randomRealisticUnicodeString(random), - Index.toIndex(true, random.nextBoolean(), random.nextBoolean()))); + customType)); writer.addDocument(doc); if (random.nextInt(10) == 0) { writer.commit(); @@ -480,8 +475,7 @@ public class TestGlobalFieldNumbers extends LuceneTestCase { } Document d = new Document(); - d.add(new Field("f1", "d1 first field", Store.YES, Index.ANALYZED, - TermVector.NO)); + d.add(new Field("f1", TextField.TYPE_STORED, "d1 first field")); writer.addDocument(d); writer.prepareCommit(); // the fnx file should still be under control of the SIS diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java index edd9d7f1049..76005c2ad23 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexFileDeleter.java @@ -17,20 +17,21 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.util.LuceneTestCase; +import java.io.*; +import java.util.*; + +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.MockDirectoryWrapper; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexWriterConfig.OpenMode; - -import java.io.*; -import java.util.*; +import org.apache.lucene.util.LuceneTestCase; /* Verify we can read the pre-2.1 file format, do searches @@ -229,8 +230,8 @@ public class TestIndexFileDeleter extends LuceneTestCase { private void addDoc(IndexWriter writer, int id) throws IOException { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); - doc.add(newField("id", Integer.toString(id), Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); + doc.add(newField("id", Integer.toString(id), StringField.TYPE_UNSTORED)); writer.addDocument(doc); } } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java index 7965406ff98..2384681093a 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReader.java @@ -31,27 +31,22 @@ import java.util.Set; import java.util.SortedSet; import org.junit.Assume; import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.document.BinaryField; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.SetBasedFieldSelector; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader.FieldOption; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; -import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.NoSuchDirectoryException; -import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.LockReleaseFailedException; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -154,10 +149,14 @@ public class TestIndexReader extends LuceneTestCase ); Document doc = new Document(); - doc.add(new Field("keyword","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field("text","test1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("unindexed","test1", Field.Store.YES, Field.Index.NO)); - doc.add(new Field("unstored","test1", Field.Store.NO, Field.Index.ANALYZED)); + + FieldType customType3 = new FieldType(); + customType3.setStored(true); + + doc.add(new Field("keyword",StringField.TYPE_STORED,"test1")); + doc.add(new Field("text",TextField.TYPE_STORED,"test1")); + doc.add(new Field("unindexed",customType3,"test1")); + doc.add(new TextField("unstored","test1")); writer.addDocument(doc); writer.close(); @@ -180,29 +179,43 @@ public class TestIndexReader extends LuceneTestCase int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor(); for (int i = 0; i < 5*mergeFactor; i++) { doc = new Document(); - doc.add(new Field("keyword","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field("text","test1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("unindexed","test1", Field.Store.YES, Field.Index.NO)); - doc.add(new Field("unstored","test1", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new Field("keyword",StringField.TYPE_STORED,"test1")); + doc.add(new Field("text",TextField.TYPE_STORED, "test1")); + doc.add(new Field("unindexed",customType3,"test1")); + doc.add(new TextField("unstored","test1")); writer.addDocument(doc); } // new fields are in some different segments (we hope) for (int i = 0; i < 5*mergeFactor; i++) { doc = new Document(); - doc.add(new Field("keyword2","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(new Field("text2","test1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("unindexed2","test1", Field.Store.YES, Field.Index.NO)); - doc.add(new Field("unstored2","test1", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new Field("keyword2",StringField.TYPE_STORED,"test1")); + doc.add(new Field("text2",TextField.TYPE_STORED, "test1")); + doc.add(new Field("unindexed2",customType3,"test1")); + doc.add(new TextField("unstored2","test1")); writer.addDocument(doc); } // new termvector fields + + FieldType customType5 = new FieldType(TextField.TYPE_STORED); + customType5.setStoreTermVectors(true); + FieldType customType6 = new FieldType(TextField.TYPE_STORED); + customType6.setStoreTermVectors(true); + customType6.setStoreTermVectorOffsets(true); + FieldType customType7 = new FieldType(TextField.TYPE_STORED); + customType7.setStoreTermVectors(true); + customType7.setStoreTermVectorPositions(true); + FieldType customType8 = new FieldType(TextField.TYPE_STORED); + customType8.setStoreTermVectors(true); + customType8.setStoreTermVectorOffsets(true); + customType8.setStoreTermVectorPositions(true); + for (int i = 0; i < 5*mergeFactor; i++) { doc = new Document(); - doc.add(new Field("tvnot","tvnot", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); - doc.add(new Field("termvector","termvector", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); - doc.add(new Field("tvoffset","tvoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); - doc.add(new Field("tvposition","tvposition", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); - doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(new Field("tvnot",TextField.TYPE_STORED,"tvnot")); + doc.add(new Field("termvector",customType5,"termvector")); + doc.add(new Field("tvoffset",customType6,"tvoffset")); + doc.add(new Field("tvposition",customType7,"tvposition")); + doc.add(new Field("tvpositionoffset",customType8, "tvpositionoffset")); writer.addDocument(doc); } @@ -277,14 +290,26 @@ public class TestIndexReader extends LuceneTestCase // want to get some more segments here // new termvector fields int mergeFactor = ((LogMergePolicy) writer.getConfig().getMergePolicy()).getMergeFactor(); + FieldType customType5 = new FieldType(TextField.TYPE_STORED); + customType5.setStoreTermVectors(true); + FieldType customType6 = new FieldType(TextField.TYPE_STORED); + customType6.setStoreTermVectors(true); + customType6.setStoreTermVectorOffsets(true); + FieldType customType7 = new FieldType(TextField.TYPE_STORED); + customType7.setStoreTermVectors(true); + customType7.setStoreTermVectorPositions(true); + FieldType customType8 = new FieldType(TextField.TYPE_STORED); + customType8.setStoreTermVectors(true); + customType8.setStoreTermVectorOffsets(true); + customType8.setStoreTermVectorPositions(true); for (int i = 0; i < 5 * mergeFactor; i++) { Document doc = new Document(); - doc.add(new Field("tvnot","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); - doc.add(new Field("termvector","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); - doc.add(new Field("tvoffset","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); - doc.add(new Field("tvposition","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); - doc.add(new Field("tvpositionoffset","one two two three three three", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - + doc.add(new Field("tvnot",TextField.TYPE_STORED,"one two two three three three")); + doc.add(new Field("termvector",customType5,"one two two three three three")); + doc.add(new Field("tvoffset",customType6,"one two two three three three")); + doc.add(new Field("tvposition",customType7,"one two two three three three")); + doc.add(new Field("tvpositionoffset",customType8, "one two two three three three")); + writer.addDocument(doc); } writer.close(); @@ -338,36 +363,21 @@ public class TestIndexReader extends LuceneTestCase writer.close(); writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.APPEND).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); - doc.add(new Field("bin1", bin)); - doc.add(new Field("junk", "junk text", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new BinaryField("bin1", bin)); + doc.add(new TextField("junk", "junk text")); writer.addDocument(doc); writer.close(); IndexReader reader = IndexReader.open(dir, false); - doc = reader.document(reader.maxDoc() - 1); - Field[] fields = doc.getFields("bin1"); + Document doc2 = reader.document(reader.maxDoc() - 1); + IndexableField[] fields = doc2.getFields("bin1"); assertNotNull(fields); assertEquals(1, fields.length); - Field b1 = fields[0]; - assertTrue(b1.isBinary()); - byte[] data1 = b1.getBinaryValue(); - assertEquals(bin.length, b1.getBinaryLength()); + IndexableField b1 = fields[0]; + assertTrue(b1.binaryValue() != null); + BytesRef bytesRef = b1.binaryValue(); + assertEquals(bin.length, bytesRef.length); for (int i = 0; i < bin.length; i++) { - assertEquals(bin[i], data1[i + b1.getBinaryOffset()]); - } - Set lazyFields = new HashSet(); - lazyFields.add("bin1"); - FieldSelector sel = new SetBasedFieldSelector(new HashSet(), lazyFields); - doc = reader.document(reader.maxDoc() - 1, sel); - Fieldable[] fieldables = doc.getFieldables("bin1"); - assertNotNull(fieldables); - assertEquals(1, fieldables.length); - Fieldable fb1 = fieldables[0]; - assertTrue(fb1.isBinary()); - assertEquals(bin.length, fb1.getBinaryLength()); - data1 = fb1.getBinaryValue(); - assertEquals(bin.length, fb1.getBinaryLength()); - for (int i = 0; i < bin.length; i++) { - assertEquals(bin[i], data1[i + fb1.getBinaryOffset()]); + assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); } reader.close(); // force optimize @@ -377,16 +387,16 @@ public class TestIndexReader extends LuceneTestCase writer.optimize(); writer.close(); reader = IndexReader.open(dir, false); - doc = reader.document(reader.maxDoc() - 1); - fields = doc.getFields("bin1"); + doc2 = reader.document(reader.maxDoc() - 1); + fields = doc2.getFields("bin1"); assertNotNull(fields); assertEquals(1, fields.length); b1 = fields[0]; - assertTrue(b1.isBinary()); - data1 = b1.getBinaryValue(); - assertEquals(bin.length, b1.getBinaryLength()); + assertTrue(b1.binaryValue() != null); + bytesRef = b1.binaryValue(); + assertEquals(bin.length, bytesRef.length); for (int i = 0; i < bin.length; i++) { - assertEquals(bin[i], data1[i + b1.getBinaryOffset()]); + assertEquals(bin[i], bytesRef.bytes[i + bytesRef.offset]); } reader.close(); dir.close(); @@ -778,38 +788,56 @@ public class TestIndexReader extends LuceneTestCase static void addDocumentWithFields(IndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("keyword","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(newField("text","test1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("unindexed","test1", Field.Store.YES, Field.Index.NO)); - doc.add(newField("unstored","test1", Field.Store.NO, Field.Index.ANALYZED)); + + FieldType customType3 = new FieldType(); + customType3.setStored(true); + doc.add(newField("keyword", "test1", StringField.TYPE_STORED)); + doc.add(newField("text", "test1", TextField.TYPE_STORED)); + doc.add(newField("unindexed", "test1", customType3)); + doc.add(new TextField("unstored","test1")); writer.addDocument(doc); } static void addDocumentWithDifferentFields(IndexWriter writer) throws IOException { - Document doc = new Document(); - doc.add(newField("keyword2","test1", Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(newField("text2","test1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("unindexed2","test1", Field.Store.YES, Field.Index.NO)); - doc.add(newField("unstored2","test1", Field.Store.NO, Field.Index.ANALYZED)); - writer.addDocument(doc); + Document doc = new Document(); + + FieldType customType3 = new FieldType(); + customType3.setStored(true); + doc.add(newField("keyword2", "test1", StringField.TYPE_STORED)); + doc.add(newField("text2", "test1", TextField.TYPE_STORED)); + doc.add(newField("unindexed2", "test1", customType3)); + doc.add(new TextField("unstored2","test1")); + writer.addDocument(doc); } static void addDocumentWithTermVectorFields(IndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("tvnot","tvnot", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); - doc.add(newField("termvector","termvector", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.YES)); - doc.add(newField("tvoffset","tvoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_OFFSETS)); - doc.add(newField("tvposition","tvposition", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS)); - doc.add(newField("tvpositionoffset","tvpositionoffset", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType5 = new FieldType(TextField.TYPE_STORED); + customType5.setStoreTermVectors(true); + FieldType customType6 = new FieldType(TextField.TYPE_STORED); + customType6.setStoreTermVectors(true); + customType6.setStoreTermVectorOffsets(true); + FieldType customType7 = new FieldType(TextField.TYPE_STORED); + customType7.setStoreTermVectors(true); + customType7.setStoreTermVectorPositions(true); + FieldType customType8 = new FieldType(TextField.TYPE_STORED); + customType8.setStoreTermVectors(true); + customType8.setStoreTermVectorOffsets(true); + customType8.setStoreTermVectorPositions(true); + doc.add(newField("tvnot","tvnot",TextField.TYPE_STORED)); + doc.add(newField("termvector","termvector",customType5)); + doc.add(newField("tvoffset","tvoffset", customType6)); + doc.add(newField("tvposition","tvposition", customType7)); + doc.add(newField("tvpositionoffset","tvpositionoffset", customType8)); writer.addDocument(doc); } static void addDoc(IndexWriter writer, String value) throws IOException { Document doc = new Document(); - doc.add(newField("content", value, Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", value, TextField.TYPE_UNSTORED)); writer.addDocument(doc); } @@ -862,11 +890,11 @@ public class TestIndexReader extends LuceneTestCase if (liveDocs1 == null || liveDocs1.get(i)) { Document doc1 = index1.document(i); Document doc2 = index2.document(i); - List fieldable1 = doc1.getFields(); - List fieldable2 = doc2.getFields(); - assertEquals("Different numbers of fields for doc " + i + ".", fieldable1.size(), fieldable2.size()); - Iterator itField1 = fieldable1.iterator(); - Iterator itField2 = fieldable2.iterator(); + List field1 = doc1.getFields(); + List field2 = doc2.getFields(); + assertEquals("Different numbers of fields for doc " + i + ".", field1.size(), field2.size()); + Iterator itField1 = field1.iterator(); + Iterator itField2 = field2.iterator(); while (itField1.hasNext()) { Field curField1 = (Field) itField1.next(); Field curField2 = (Field) itField2.next(); @@ -1047,7 +1075,11 @@ public class TestIndexReader extends LuceneTestCase static Document createDocument(String id) { Document doc = new Document(); - doc.add(newField("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setTokenized(false); + customType.setOmitNorms(true); + + doc.add(newField("id", id, customType)); return doc; } @@ -1097,7 +1129,7 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); - doc.add(newField("number", "17", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("number", "17", StringField.TYPE_UNSTORED)); writer.addDocument(doc); writer.close(); @@ -1132,7 +1164,7 @@ public class TestIndexReader extends LuceneTestCase setMergePolicy(newLogMergePolicy(10)) ); Document doc = new Document(); - doc.add(newField("number", "17", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("number", "17", StringField.TYPE_UNSTORED)); writer.addDocument(doc); writer.commit(); @@ -1164,8 +1196,8 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); Document doc = new Document(); - doc.add(newField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED)); - doc.add(newField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", TextField.TYPE_UNSTORED)); + doc.add(newField("number", "0 1 2 3 4 5 6 7 8 9", TextField.TYPE_UNSTORED)); writer.addDocument(doc); writer.addDocument(doc); writer.commit(); @@ -1197,8 +1229,8 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setCodecProvider(_TestUtil.alwaysCodec("Standard"))); Document doc = new Document(); - doc.add(newField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", Field.Store.NO, Field.Index.ANALYZED)); - doc.add(newField("number", "0 1 2 3 4 5 6 7 8 9", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("field", "a b c d e f g h i j k l m n o p q r s t u v w x y z", TextField.TYPE_UNSTORED)); + doc.add(newField("number", "0 1 2 3 4 5 6 7 8 9", TextField.TYPE_UNSTORED)); writer.addDocument(doc); writer.addDocument(doc); writer.close(); @@ -1302,7 +1334,7 @@ public class TestIndexReader extends LuceneTestCase Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d = new Document(); - d.add(newField("f", "a a b", Field.Index.ANALYZED)); + d.add(newField("f", "a a b", TextField.TYPE_UNSTORED)); writer.addDocument(d); IndexReader r = writer.getReader(); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java index 9b6c4d24fd2..42cf8c07cc4 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderClone.java @@ -17,12 +17,10 @@ package org.apache.lucene.index; * limitations under the License. */ -import org.apache.lucene.index.SegmentNorms; import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.search.Similarity; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.store.LockObtainFailedException; import org.apache.lucene.util.LuceneTestCase; @@ -500,7 +498,7 @@ public class TestIndexReaderClone extends LuceneTestCase { setMergePolicy(newLogMergePolicy(false)) ); Document doc = new Document(); - doc.add(newField("field", "yes it's stored", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", "yes it's stored", TextField.TYPE_STORED)); w.addDocument(doc); w.close(); IndexReader r1 = IndexReader.open(dir, false); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java index 228d03331da..c9cd59728e2 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderCloneNorms.java @@ -26,10 +26,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; -import org.apache.lucene.index.SegmentNorms; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarityProvider; import org.apache.lucene.search.Similarity; @@ -329,8 +328,11 @@ public class TestIndexReaderCloneNorms extends LuceneTestCase { private Document newDoc() { Document d = new Document(); float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed + + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setTokenized(false); for (int i = 0; i < 10; i++) { - Field f = newField("f" + i, "v" + i, Store.NO, Index.NOT_ANALYZED); + Field f = newField("f" + i, "v" + i, customType); f.setBoost(boost); d.add(f); } diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderDelete.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderDelete.java index 9af326f3feb..ce15d32ee9f 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderDelete.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderDelete.java @@ -21,7 +21,7 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; @@ -276,11 +276,11 @@ public class TestIndexReaderDelete extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter w= new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); - doc.add(newField("f", "doctor", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("f", "doctor", StringField.TYPE_UNSTORED)); w.addDocument(doc); doc = new Document(); w.commit(); - doc.add(newField("f", "who", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("f", "who", StringField.TYPE_UNSTORED)); w.addDocument(doc); IndexReader r = new SlowMultiReaderWrapper(w.getReader()); w.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java index 058939eee01..bc0f114160a 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderOnDiskFull.java @@ -21,11 +21,11 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Similarity; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.MockDirectoryWrapper; import org.apache.lucene.store.RAMDirectory; @@ -52,8 +52,8 @@ public class TestIndexReaderOnDiskFull extends LuceneTestCase { } for(int i=0;i<157;i++) { Document d = new Document(); - d.add(newField("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); - d.add(newField("content", "aaa " + i, Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("id", Integer.toString(i), StringField.TYPE_STORED)); + d.add(newField("content", "aaa " + i, TextField.TYPE_UNSTORED)); writer.addDocument(d); if (0==i%10) writer.commit(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java index 3200204df5f..f981a1505e0 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexReaderReopen.java @@ -17,7 +17,6 @@ package org.apache.lucene.index; * limitations under the License. */ -import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; @@ -31,16 +30,15 @@ import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.FieldCache; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; @@ -168,12 +166,19 @@ public class TestIndexReaderReopen extends LuceneTestCase { IndexReader reader = IndexReader.open(dir, false); try { int M = 3; + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setTokenized(false); + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setTokenized(false); + customType2.setOmitNorms(true); + FieldType customType3 = new FieldType(); + customType3.setStored(true); for (int i=0; i<4; i++) { for (int j=0; j0) { int k = i-1; @@ -957,13 +962,18 @@ public class TestIndexReaderReopen extends LuceneTestCase { Document doc = new Document(); sb.append("a"); sb.append(n); - doc.add(new Field("field1", sb.toString(), Store.YES, Index.ANALYZED)); - doc.add(new Field("fielda", sb.toString(), Store.YES, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field("fieldb", sb.toString(), Store.YES, Index.NO)); + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + customType2.setTokenized(false); + customType2.setOmitNorms(true); + FieldType customType3 = new FieldType(); + customType3.setStored(true); + doc.add(new Field("field1", TextField.TYPE_STORED, sb.toString())); + doc.add(new Field("fielda", customType2, sb.toString())); + doc.add(new Field("fieldb", customType3, sb.toString())); sb.append(" b"); sb.append(n); for (int i = 1; i < numFields; i++) { - doc.add(new Field("field" + (i+1), sb.toString(), Store.YES, Index.ANALYZED)); + doc.add(new Field("field" + (i+1), TextField.TYPE_STORED, sb.toString())); } return doc; } @@ -1178,7 +1188,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { ); for(int i=0;i<4;i++) { Document doc = new Document(); - doc.add(newField("id", ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("id", ""+i, StringField.TYPE_UNSTORED)); writer.addDocument(doc); Map data = new HashMap(); data.put("index", i+""); @@ -1239,7 +1249,7 @@ public class TestIndexReaderReopen extends LuceneTestCase { setMergePolicy(newLogMergePolicy(10)) ); Document doc = new Document(); - doc.add(newField("number", "17", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("number", "17", StringField.TYPE_UNSTORED)); writer.addDocument(doc); writer.commit(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java index e6513a6718d..8006985e456 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriter.java @@ -26,28 +26,23 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Random; -import java.util.Set; -import java.util.concurrent.atomic.AtomicBoolean; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockFixedLengthPayloadFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.BinaryField; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.FieldCache; @@ -60,7 +55,6 @@ import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockFactory; @@ -69,15 +63,13 @@ import org.apache.lucene.store.NoLockFactory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.store.SingleInstanceLockFactory; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ThreadInterruptedException; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; -import org.apache.lucene.index.codecs.preflexrw.PreFlexRWCodec; public class TestIndexWriter extends LuceneTestCase { + private static final FieldType storedTextType = new FieldType(TextField.TYPE_UNSTORED); public void testDocCount() throws IOException { Directory dir = newDirectory(); @@ -138,15 +130,15 @@ public class TestIndexWriter extends LuceneTestCase { static void addDoc(IndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } static void addDocWithIndex(IndexWriter writer, int index) throws IOException { Document doc = new Document(); - doc.add(newField("content", "aaa " + index, Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("id", "" + index, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa " + index, storedTextType)); + doc.add(newField("id", "" + index, storedTextType)); writer.addDocument(doc); } @@ -256,12 +248,12 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); for(int j=0;j<100;j++) { Document doc = new Document(); - doc.add(newField("a"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("b"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("c"+j, "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("d"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("e"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("f"+j, "aaa", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("a"+j, "aaa" + j, storedTextType)); + doc.add(newField("b"+j, "aaa" + j, storedTextType)); + doc.add(newField("c"+j, "aaa" + j, storedTextType)); + doc.add(newField("d"+j, "aaa", storedTextType)); + doc.add(newField("e"+j, "aaa", storedTextType)); + doc.add(newField("f"+j, "aaa", storedTextType)); writer.addDocument(doc); } writer.close(); @@ -292,7 +284,7 @@ public class TestIndexWriter extends LuceneTestCase { int lastNumFile = dir.listAll().length; for(int j=0;j<9;j++) { Document doc = new Document(); - doc.add(newField("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", "aaa" + j, storedTextType)); writer.addDocument(doc); int numFile = dir.listAll().length; // Verify that with a tiny RAM buffer we see new @@ -315,7 +307,7 @@ public class TestIndexWriter extends LuceneTestCase { int lastFlushCount = -1; for(int j=1;j<52;j++) { Document doc = new Document(); - doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("field", storedTextType, "aaa" + j)); writer.addDocument(doc); _TestUtil.syncConcurrentMerges(writer); int flushCount = writer.getFlushCount(); @@ -369,7 +361,7 @@ public class TestIndexWriter extends LuceneTestCase { for(int j=1;j<52;j++) { Document doc = new Document(); - doc.add(new Field("field", "aaa" + j, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("field", storedTextType, "aaa" + j)); writer.addDocument(doc); } @@ -430,7 +422,7 @@ public class TestIndexWriter extends LuceneTestCase { for(int j=0;j<100;j++) { Document doc = new Document(); for(int k=0;k<100;k++) { - doc.add(newField("field", Integer.toString(random.nextInt()), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", Integer.toString(random.nextInt()), storedTextType)); } writer.addDocument(doc); } @@ -439,7 +431,7 @@ public class TestIndexWriter extends LuceneTestCase { // occurs (heavy on byte blocks) for(int j=0;j<100;j++) { Document doc = new Document(); - doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", "aaa aaa aaa aaa aaa aaa aaa aaa aaa aaa", storedTextType)); writer.addDocument(doc); } @@ -454,7 +446,7 @@ public class TestIndexWriter extends LuceneTestCase { String longTerm = b.toString(); Document doc = new Document(); - doc.add(newField("field", longTerm, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", longTerm, storedTextType)); writer.addDocument(doc); } } @@ -472,11 +464,16 @@ public class TestIndexWriter extends LuceneTestCase { MockDirectoryWrapper dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(10)); // Enable norms for only 1 doc, pre flush + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setOmitNorms(true); for(int j=0;j<10;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = null; if (j != 8) { - f.setOmitNorms(true); + f = newField("field", "aaa", customType); + } + else { + f = newField("field", "aaa", storedTextType); } doc.add(f); writer.addDocument(doc); @@ -495,9 +492,12 @@ public class TestIndexWriter extends LuceneTestCase { // Enable norms for only 1 doc, post flush for(int j=0;j<27;j++) { Document doc = new Document(); - Field f = newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED); + Field f = null; if (j != 26) { - f.setOmitNorms(true); + f = newField("field", "aaa", customType); + } + else { + f = newField("field", "aaa", storedTextType); } doc.add(f); writer.addDocument(doc); @@ -527,7 +527,11 @@ public class TestIndexWriter extends LuceneTestCase { b.append(" a a a a a a a a"); } Document doc = new Document(); - doc.add(newField("field", b.toString(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("field", b.toString(), customType)); writer.addDocument(doc); writer.close(); @@ -595,7 +599,11 @@ public class TestIndexWriter extends LuceneTestCase { setMergePolicy(newLogMergePolicy(10)) ); Document doc = new Document(); - doc.add(newField("field", "aaa", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("field", "aaa", customType)); for(int i=0;i<19;i++) writer.addDocument(doc); writer.flush(false, true); @@ -615,7 +623,11 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); - doc.add(newField("field", "aaa", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("field", "aaa", customType)); writer.addDocument(doc); writer.commit(); if (VERBOSE) { @@ -644,7 +656,9 @@ public class TestIndexWriter extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document document = new Document(); - document.add(newField("tvtest", "", Store.NO, Index.ANALYZED, TermVector.YES)); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setStoreTermVectors(true); + document.add(newField("tvtest", "", customType)); iw.addDocument(document); iw.close(); dir.close(); @@ -661,8 +675,9 @@ public class TestIndexWriter extends LuceneTestCase { ((LogMergePolicy) conf.getMergePolicy()).setMergeFactor(2); IndexWriter iw = new IndexWriter(dir, conf); Document document = new Document(); - document.add(newField("tvtest", "a b c", Field.Store.NO, Field.Index.ANALYZED, - Field.TermVector.YES)); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setStoreTermVectors(true); + document.add(newField("tvtest", "a b c", customType)); Thread.currentThread().setPriority(Thread.MAX_PRIORITY); for(int i=0;i<4;i++) iw.addDocument(document); @@ -688,24 +703,20 @@ public class TestIndexWriter extends LuceneTestCase { Document doc = new Document(); String contents = "aa bb cc dd ee ff gg hh ii jj kk"; + FieldType customType = new FieldType(TextField.TYPE_STORED); + FieldType type = null; if (i == 7) { // Add empty docs here - doc.add(newField("content3", "", Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("content3", "", TextField.TYPE_UNSTORED)); } else { - Field.Store storeVal; if (i%2 == 0) { - doc.add(newField("content4", contents, Field.Store.YES, - Field.Index.ANALYZED)); - storeVal = Field.Store.YES; + doc.add(newField("content4", contents, customType)); + type = customType; } else - storeVal = Field.Store.NO; - doc.add(newField("content1", contents, storeVal, - Field.Index.ANALYZED)); - doc.add(newField("content3", "", Field.Store.YES, - Field.Index.ANALYZED)); - doc.add(newField("content5", "", storeVal, - Field.Index.ANALYZED)); + type = TextField.TYPE_UNSTORED; + doc.add(newField("content1", contents, TextField.TYPE_UNSTORED)); + doc.add(newField("content3", "", customType)); + doc.add(newField("content5", "", type)); } for(int j=0;j<4;j++) @@ -731,7 +742,10 @@ public class TestIndexWriter extends LuceneTestCase { Directory directory = newDirectory(); final Document doc = new Document(); - Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setTokenized(false); + + Field idField = newField("id", "", customType); doc.add(idField); for(int pass=0;pass<2;pass++) { @@ -835,7 +849,7 @@ public class TestIndexWriter extends LuceneTestCase { for(int i=0;i<10000;i++) b.append(" a"); b.append(" x"); - doc.add(newField("field", b.toString(), Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("field", b.toString(), TextField.TYPE_UNSTORED)); writer.addDocument(doc); writer.close(); @@ -853,7 +867,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); - doc.add(newField("", "a b c", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("", "a b c", TextField.TYPE_UNSTORED)); writer.addDocument(doc); writer.close(); dir.close(); @@ -887,8 +901,8 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); - doc.add(newField("field", "a field", Field.Store.YES, - Field.Index.ANALYZED)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + doc.add(newField("field", "a field", customType)); w.addDocument(doc); w.commit(); assertTrue(w.beforeWasCalled); @@ -931,7 +945,7 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); - doc.add(new Field("field", tokens)); + doc.add(new TextField("field", tokens)); w.addDocument(doc); w.commit(); @@ -972,20 +986,20 @@ public class TestIndexWriter extends LuceneTestCase { b[i] = (byte) (i+77); Document doc = new Document(); - Field f = new Field("binary", b, 10, 17); - byte[] bx = f.getBinaryValue(); + Field f = new BinaryField("binary", b, 10, 17); + byte[] bx = f.binaryValue().bytes; assertTrue(bx != null); assertEquals(50, bx.length); - assertEquals(10, f.getBinaryOffset()); - assertEquals(17, f.getBinaryLength()); + assertEquals(10, f.binaryValue().offset); + assertEquals(17, f.binaryValue().length); doc.add(f); w.addDocument(doc); w.close(); IndexReader ir = IndexReader.open(dir, true); - doc = ir.document(0); - f = doc.getField("binary"); - b = f.getBinaryValue(); + Document doc2 = ir.document(0); + IndexableField f2 = doc2.getField("binary"); + b = f2.binaryValue().bytes; assertTrue(b != null); assertEquals(17, b.length, 17); assertEquals(87, b[0]); @@ -1001,10 +1015,11 @@ public class TestIndexWriter extends LuceneTestCase { IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document doc = new Document(); - Field f = newField("field", "", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); - Field f2 = newField("field", "crunch man", Field.Store.NO, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + Field f = newField("field", "", customType); + Field f2 = newField("field", "crunch man", customType); doc.add(f); doc.add(f2); w.addDocument(doc); @@ -1046,8 +1061,13 @@ public class TestIndexWriter extends LuceneTestCase { Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); Document doc = new Document(); - doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + + doc.add(newField("content", "aaa bbb ccc ddd eee fff ggg hhh iii", customType)); writer.addDocument(doc); writer.addDocument(doc); writer.addDocument(doc); @@ -1099,7 +1119,7 @@ public class TestIndexWriter extends LuceneTestCase { w = new IndexWriter(dir, conf); Document doc = new Document(); - doc.add(newField("field", "some text contents", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", "some text contents", storedTextType)); for(int i=0;i<100;i++) { w.addDocument(doc); if (i%10 == 0) { @@ -1213,9 +1233,17 @@ public class TestIndexWriter extends LuceneTestCase { b[i] = (byte) (i+77); Document doc = new Document(); - Field f = new Field("binary", b, 10, 17); + + FieldType customType = new FieldType(BinaryField.TYPE_STORED); + customType.setTokenized(true); + customType.setIndexed(true); + + Field f = new Field("binary", customType, b, 10, 17); f.setTokenStream(new MockTokenizer(new StringReader("doc1field1"), MockTokenizer.WHITESPACE, false)); - Field f2 = newField("string", "value", Field.Store.YES,Field.Index.ANALYZED); + + FieldType customType2 = new FieldType(TextField.TYPE_STORED); + + Field f2 = newField("string", "value", customType2); f2.setTokenStream(new MockTokenizer(new StringReader("doc1field2"), MockTokenizer.WHITESPACE, false)); doc.add(f); doc.add(f2); @@ -1238,16 +1266,16 @@ public class TestIndexWriter extends LuceneTestCase { w.close(); IndexReader ir = IndexReader.open(dir, true); - doc = ir.document(0); - f = doc.getField("binary"); - b = f.getBinaryValue(); + Document doc2 = ir.document(0); + IndexableField f3 = doc2.getField("binary"); + b = f3.binaryValue().bytes; assertTrue(b != null); assertEquals(17, b.length, 17); assertEquals(87, b[0]); - assertTrue(ir.document(0).getFieldable("binary").isBinary()); - assertTrue(ir.document(1).getFieldable("binary").isBinary()); - assertTrue(ir.document(2).getFieldable("binary").isBinary()); + assertTrue(ir.document(0).getField("binary").binaryValue()!=null); + assertTrue(ir.document(1).getField("binary").binaryValue()!=null); + assertTrue(ir.document(2).getField("binary").binaryValue()!=null); assertEquals("value", ir.document(0).get("string")); assertEquals("value", ir.document(1).get("string")); @@ -1272,13 +1300,16 @@ public class TestIndexWriter extends LuceneTestCase { Directory d = newDirectory(); IndexWriter w = new IndexWriter(d, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); - doc.add(newField("zzz", "a b c", Field.Store.YES, Field.Index.NO)); - doc.add(newField("aaa", "a b c", Field.Store.YES, Field.Index.NO)); - doc.add(newField("zzz", "1 2 3", Field.Store.YES, Field.Index.NO)); + + FieldType customType = new FieldType(); + customType.setStored(true); + doc.add(newField("zzz", "a b c", customType)); + doc.add(newField("aaa", "a b c", customType)); + doc.add(newField("zzz", "1 2 3", customType)); w.addDocument(doc); IndexReader r = w.getReader(); - doc = r.document(0); - Iterator it = doc.getFields().iterator(); + Document doc2 = r.document(0); + Iterator it = doc2.getFields().iterator(); assertTrue(it.hasNext()); Field f = (Field) it.next(); assertEquals(f.name(), "zzz"); @@ -1322,7 +1353,7 @@ public class TestIndexWriter extends LuceneTestCase { s.append(' ').append(i); } Document d = new Document(); - Field f = newField("field", s.toString(), Field.Store.NO, Field.Index.ANALYZED); + Field f = newField("field", s.toString(), TextField.TYPE_UNSTORED); d.add(f); w.addDocument(d); @@ -1354,7 +1385,7 @@ public class TestIndexWriter extends LuceneTestCase { setMergePolicy(mergePolicy) ); Document doc = new Document(); - doc.add(newField("field", "go", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("field", "go", TextField.TYPE_UNSTORED)); w.addDocument(doc); IndexReader r; if (iter == 0) { @@ -1422,7 +1453,13 @@ public class TestIndexWriter extends LuceneTestCase { // First commit Document doc = new Document(); - doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + + doc.add(newField("c", "val", customType)); writer.addDocument(doc); writer.commit(); assertEquals(1, IndexReader.listCommits(dir).size()); @@ -1432,7 +1469,7 @@ public class TestIndexWriter extends LuceneTestCase { // Second commit - now KeepOnlyLastCommit cannot delete the prev commit. doc = new Document(); - doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("c", "val", customType)); writer.addDocument(doc); writer.commit(); assertEquals(2, IndexReader.listCommits(dir).size()); @@ -1479,14 +1516,18 @@ public class TestIndexWriter extends LuceneTestCase { } Document doc = new Document(); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); // create as many files as possible - doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("c", "val", customType)); writer.addDocument(doc); // Adding just one document does not call flush yet. assertEquals("only the stored and term vector files should exist in the directory", 5 + extraFileCount, dir.listAll().length); doc = new Document(); - doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("c", "val", customType)); writer.addDocument(doc); // The second document should cause a flush. @@ -1509,7 +1550,11 @@ public class TestIndexWriter extends LuceneTestCase { TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); Document doc = new Document(); - doc.add(newField("c", "val", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("c", "val", customType)); w.addDocument(doc); w.addDocument(doc); IndexWriter w2 = new IndexWriter(dir, newIndexWriterConfig( @@ -1536,7 +1581,9 @@ public class TestIndexWriter extends LuceneTestCase { final List fieldIDs = new ArrayList(); - Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setTokenized(false); + Field idField = newField("id", "", customType); for(int i=0;i { private final Document doc; private final int count; + + /* private field types */ + /* private field types */ + + private static final FieldType custom1 = new FieldType(TextField.TYPE_UNSTORED); + private static final FieldType custom2 = new FieldType(); + private static final FieldType custom3 = new FieldType(); + private static final FieldType custom4 = new FieldType(StringField.TYPE_UNSTORED); + private static final FieldType custom5 = new FieldType(TextField.TYPE_STORED); + + static { + + custom1.setStoreTermVectors(true); + custom1.setStoreTermVectorPositions(true); + custom1.setStoreTermVectorOffsets(true); + + custom2.setStored(true); + custom2.setIndexed(true); + + custom3.setStored(true); + + custom4.setStoreTermVectors(true); + custom4.setStoreTermVectorPositions(true); + custom4.setStoreTermVectorOffsets(true); + + custom5.setStoreTermVectors(true); + custom5.setStoreTermVectorPositions(true); + custom5.setStoreTermVectorOffsets(true); + } public DocCopyIterator(Document doc, int count) { this.count = count; @@ -101,17 +132,17 @@ public class TestIndexWriterExceptions extends LuceneTestCase { final Document doc = new Document(); - doc.add(newField(r, "content1", "aaa bbb ccc ddd", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField(r, "content6", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(newField(r, "content2", "aaa bbb ccc ddd", Field.Store.YES, Field.Index.NOT_ANALYZED)); - doc.add(newField(r, "content3", "aaa bbb ccc ddd", Field.Store.YES, Field.Index.NO)); + doc.add(newField(r, "content1", "aaa bbb ccc ddd", TextField.TYPE_STORED)); + doc.add(newField(r, "content6", "aaa bbb ccc ddd", DocCopyIterator.custom1)); + doc.add(newField(r, "content2", "aaa bbb ccc ddd", DocCopyIterator.custom2)); + doc.add(newField(r, "content3", "aaa bbb ccc ddd", DocCopyIterator.custom3)); - doc.add(newField(r, "content4", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.ANALYZED)); - doc.add(newField(r, "content5", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField(r, "content4", "aaa bbb ccc ddd", TextField.TYPE_UNSTORED)); + doc.add(newField(r, "content5", "aaa bbb ccc ddd", StringField.TYPE_UNSTORED)); - doc.add(newField(r, "content7", "aaa bbb ccc ddd", Field.Store.NO, Field.Index.NOT_ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField(r, "content7", "aaa bbb ccc ddd", DocCopyIterator.custom4)); - final Field idField = newField(r, "id", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + final Field idField = newField(r, "id", "", DocCopyIterator.custom2); doc.add(idField); final long stopTime = System.currentTimeMillis() + 500; @@ -337,8 +368,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { MockIndexWriter2 w = new MockIndexWriter2(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); - doc.add(newField("field", "a field", Field.Store.YES, - Field.Index.ANALYZED)); + doc.add(newField("field", "a field", TextField.TYPE_STORED)); w.addDocument(doc); w.doFail = true; try { @@ -357,8 +387,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { MockIndexWriter w = new MockIndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); - doc.add(newField("field", "a field", Field.Store.YES, - Field.Index.ANALYZED)); + doc.add(newField("field", "a field", TextField.TYPE_STORED)); w.addDocument(doc); Analyzer analyzer = new Analyzer() { @@ -371,8 +400,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { }; Document crashDoc = new Document(); - crashDoc.add(newField("crash", "do it on token 4", Field.Store.YES, - Field.Index.ANALYZED)); + crashDoc.add(newField("crash", "do it on token 4", TextField.TYPE_STORED)); try { w.addDocument(crashDoc, analyzer); fail("did not hit expected exception"); @@ -413,8 +441,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { MockIndexWriter3 w = new MockIndexWriter3(dir, conf); w.doFail = true; Document doc = new Document(); - doc.add(newField("field", "a field", Field.Store.YES, - Field.Index.ANALYZED)); + doc.add(newField("field", "a field", TextField.TYPE_STORED)); for(int i=0;i<10;i++) try { w.addDocument(doc); @@ -463,8 +490,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { Document doc = new Document(); String contents = "aa bb cc dd ee ff gg hh ii jj kk"; - doc.add(newField("content", contents, Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("content", contents, TextField.TYPE_UNSTORED)); try { writer.addDocument(doc); fail("did not hit expected exception"); @@ -473,14 +499,12 @@ public class TestIndexWriterExceptions extends LuceneTestCase { // Make sure we can add another normal document doc = new Document(); - doc.add(newField("content", "aa bb cc dd", Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("content", "aa bb cc dd", TextField.TYPE_UNSTORED)); writer.addDocument(doc); // Make sure we can add another normal document doc = new Document(); - doc.add(newField("content", "aa bb cc dd", Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("content", "aa bb cc dd", TextField.TYPE_UNSTORED)); writer.addDocument(doc); writer.close(); @@ -551,8 +575,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMaxBufferedDocs(2)); Document doc = new Document(); String contents = "aa bb cc dd ee ff gg hh ii jj kk"; - doc.add(newField("content", contents, Field.Store.NO, - Field.Index.ANALYZED)); + doc.add(newField("content", contents, TextField.TYPE_UNSTORED)); boolean hitError = false; for(int i=0;i<200;i++) { try { @@ -595,14 +618,11 @@ public class TestIndexWriterExceptions extends LuceneTestCase { lmp.setMergeFactor(Math.max(lmp.getMergeFactor(), 5)); Document doc = new Document(); - doc.add(newField("contents", "here are some contents", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("contents", "here are some contents", DocCopyIterator.custom5)); writer.addDocument(doc); writer.addDocument(doc); - doc.add(newField("crash", "this should crash after 4 terms", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); - doc.add(newField("other", "this will not get indexed", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("crash", "this should crash after 4 terms", DocCopyIterator.custom5)); + doc.add(newField("other", "this will not get indexed", DocCopyIterator.custom5)); try { writer.addDocument(doc); fail("did not hit expected exception"); @@ -615,8 +635,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { if (0 == i) { doc = new Document(); - doc.add(newField("contents", "here are some contents", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("contents", "here are some contents", DocCopyIterator.custom5)); writer.addDocument(doc); writer.addDocument(doc); } @@ -648,8 +667,7 @@ public class TestIndexWriterExceptions extends LuceneTestCase { writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setMaxBufferedDocs(10)); doc = new Document(); - doc.add(newField("contents", "here are some contents", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + doc.add(newField("contents", "here are some contents", DocCopyIterator.custom5)); for(int j=0;j<17;j++) writer.addDocument(doc); writer.optimize(); @@ -705,14 +723,11 @@ public class TestIndexWriterExceptions extends LuceneTestCase { try { for(int iter=0;iter allTerms = new HashSet(); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java b/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java index e1a29440021..a699f9538f7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java +++ b/lucene/src/test/org/apache/lucene/index/TestIndexWriterWithThreads.java @@ -23,6 +23,8 @@ import java.util.concurrent.CountDownLatch; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.MockDirectoryWrapper; @@ -55,7 +57,12 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { public void run() { final Document doc = new Document(); - doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + + doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); int idUpto = 0; int fullCount = 0; @@ -291,7 +298,11 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(2).setMergeScheduler(new ConcurrentMergeScheduler())); final Document doc = new Document(); - doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setStoreTermVectors(true); + customType.setStoreTermVectorPositions(true); + customType.setStoreTermVectorOffsets(true); + doc.add(newField("field", "aaa bbb ccc ddd eee fff ggg hhh iii jjj", customType)); for(int i=0;i<6;i++) writer.addDocument(doc); @@ -464,8 +475,7 @@ public class TestIndexWriterWithThreads extends LuceneTestCase { public void run() { try { Document doc = new Document(); - Field field = newField("field", "testData", Field.Store.YES, - Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS); + Field field = newField("field", "testData", TextField.TYPE_STORED); doc.add(field); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); diff --git a/lucene/src/test/org/apache/lucene/index/TestIndexableField.java b/lucene/src/test/org/apache/lucene/index/TestIndexableField.java new file mode 100644 index 00000000000..ea6c23c6848 --- /dev/null +++ b/lucene/src/test/org/apache/lucene/index/TestIndexableField.java @@ -0,0 +1,342 @@ +package org.apache.lucene.index; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.Reader; +import java.io.StringReader; +import java.util.Iterator; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumericField.DataType; +import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.index.values.PerDocFieldValues; +import org.apache.lucene.index.values.ValueType; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util._TestUtil; + +public class TestIndexableField extends LuceneTestCase { + + private class MyField implements IndexableField { + + private final int counter; + + public MyField(int counter) { + this.counter = counter; + } + + @Override + public String name() { + return "f" + counter; + } + + @Override + public float boost() { + return 1.0f + random.nextFloat(); + } + + @Override + public boolean stored() { + return (counter & 1) == 0 || (counter % 10) == 3; + } + + @Override + public BytesRef binaryValue() { + if ((counter%10) == 3) { + final byte[] bytes = new byte[10]; + for(int idx=0;idx() { + @Override + public Iterator iterator() { + return new Iterator() { + int fieldUpto; + + @Override + public boolean hasNext() { + return fieldUpto < fieldCount; + } + + @Override + public IndexableField next() { + assert fieldUpto < fieldCount; + if (fieldUpto == 0) { + fieldUpto = 1; + return newField("id", ""+finalDocCount, StringField.TYPE_STORED); + } else { + return new MyField(finalBaseCount + (fieldUpto++-1)); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }); + } + + final IndexReader r = w.getReader(); + w.close(); + + final IndexSearcher s = new IndexSearcher(r); + int counter = 0; + for(int id=0;id docIDs = new ArrayList(); final SubDocs subDocs = new SubDocs(packID, docIDs); final List docsList = new ArrayList(); diff --git a/lucene/src/test/org/apache/lucene/index/TestNoDeletionPolicy.java b/lucene/src/test/org/apache/lucene/index/TestNoDeletionPolicy.java index 14e7b39a359..2aeb242ee9e 100644 --- a/lucene/src/test/org/apache/lucene/index/TestNoDeletionPolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestNoDeletionPolicy.java @@ -23,8 +23,7 @@ import java.lang.reflect.Modifier; import java.util.Arrays; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; @@ -74,7 +73,7 @@ public class TestNoDeletionPolicy extends LuceneTestCase { .setIndexDeletionPolicy(NoDeletionPolicy.INSTANCE)); for (int i = 0; i < 10; i++) { Document doc = new Document(); - doc.add(newField("c", "a" + i, Store.YES, Index.ANALYZED)); + doc.add(newField("c", "a" + i, TextField.TYPE_STORED)); writer.addDocument(doc); writer.commit(); assertEquals("wrong number of commits !", i + 1, IndexReader.listCommits(dir).size()); diff --git a/lucene/src/test/org/apache/lucene/index/TestNorms.java b/lucene/src/test/org/apache/lucene/index/TestNorms.java index 372ae2ef964..ba686ea81a6 100755 --- a/lucene/src/test/org/apache/lucene/index/TestNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestNorms.java @@ -25,8 +25,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DefaultSimilarity; import org.apache.lucene.search.DefaultSimilarityProvider; @@ -220,7 +219,7 @@ public class TestNorms extends LuceneTestCase { Document d = new Document(); float boost = nextNorm("anyfield"); // in this test the same similarity is used for all fields so it does not matter what field is passed for (int i = 0; i < 10; i++) { - Field f = newField("f"+i,"v"+i,Store.NO,Index.NOT_ANALYZED); + Field f = newField("f"+i,"v"+i,TextField.TYPE_UNSTORED); f.setBoost(boost); d.add(f); } @@ -277,8 +276,8 @@ public class TestNorms extends LuceneTestCase { }); RandomIndexWriter writer = new RandomIndexWriter(random, dir, config); Document doc = new Document(); - Field foo = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); - Field bar = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + Field foo = newField("foo", "", TextField.TYPE_UNSTORED); + Field bar = newField("bar", "", TextField.TYPE_UNSTORED); doc.add(foo); doc.add(bar); diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java b/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java index 8ffb290c0bb..865945f0b36 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitNorms.java @@ -25,6 +25,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; public class TestOmitNorms extends LuceneTestCase { @@ -37,12 +39,13 @@ public class TestOmitNorms extends LuceneTestCase { Document d = new Document(); // this field will have norms - Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + Field f1 = newField("f1", "This field has norms", TextField.TYPE_UNSTORED); d.add(f1); // this field will NOT have norms - Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); - f2.setOmitNorms(true); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); + Field f2 = newField("f2", "This field has NO norms in all docs", customType); d.add(f2); writer.addDocument(d); @@ -52,11 +55,9 @@ public class TestOmitNorms extends LuceneTestCase { d = new Document(); // Reverse - f1.setOmitNorms(true); - d.add(f1); + d.add(newField("f1", "This field has norms", customType)); - f2.setOmitNorms(false); - d.add(f2); + d.add(newField("f2", "This field has NO norms in all docs", TextField.TYPE_UNSTORED)); writer.addDocument(d); @@ -88,12 +89,13 @@ public class TestOmitNorms extends LuceneTestCase { Document d = new Document(); // this field will have norms - Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + Field f1 = newField("f1", "This field has norms", TextField.TYPE_UNSTORED); d.add(f1); // this field will NOT have norms - Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); - f2.setOmitNorms(true); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); + Field f2 = newField("f2", "This field has NO norms in all docs", customType); d.add(f2); for (int i = 0; i < 30; i++) { @@ -105,11 +107,9 @@ public class TestOmitNorms extends LuceneTestCase { d = new Document(); // Reverese - f1.setOmitNorms(true); - d.add(f1); + d.add(newField("f1", "This field has norms", customType)); - f2.setOmitNorms(false); - d.add(f2); + d.add(newField("f2", "This field has NO norms in all docs", TextField.TYPE_UNSTORED)); for (int i = 0; i < 30; i++) { writer.addDocument(d); @@ -144,18 +144,19 @@ public class TestOmitNorms extends LuceneTestCase { Document d = new Document(); // this field will have norms - Field f1 = newField("f1", "This field has norms", Field.Store.NO, Field.Index.ANALYZED); + Field f1 = newField("f1", "This field has norms", TextField.TYPE_UNSTORED); d.add(f1); // this field will NOT have norms - Field f2 = newField("f2", "This field has NO norms in all docs", Field.Store.NO, Field.Index.ANALYZED); + + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); + Field f2 = newField("f2", "This field has NO norms in all docs", customType); d.add(f2); for (int i = 0; i < 5; i++) { writer.addDocument(d); } - - f2.setOmitNorms(true); for (int i = 0; i < 20; i++) { writer.addDocument(d); @@ -194,9 +195,10 @@ public class TestOmitNorms extends LuceneTestCase { lmp.setMergeFactor(2); lmp.setUseCompoundFile(false); Document d = new Document(); - - Field f1 = newField("f1", "This field has no norms", Field.Store.NO, Field.Index.ANALYZED); - f1.setOmitNorms(true); + + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); + Field f1 = newField("f1", "This field has no norms", customType); d.add(f1); for (int i = 0; i < 30; i++) { @@ -224,16 +226,23 @@ public class TestOmitNorms extends LuceneTestCase { */ public void testOmitNormsCombos() throws IOException { // indexed with norms - Field norms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED); + FieldType customType = new FieldType(TextField.TYPE_STORED); + Field norms = new Field("foo", customType, "a"); // indexed without norms - Field noNorms = new Field("foo", "a", Field.Store.YES, Field.Index.ANALYZED_NO_NORMS); + FieldType customType1 = new FieldType(TextField.TYPE_STORED); + customType1.setOmitNorms(true); + Field noNorms = new Field("foo", customType1, "a"); // not indexed, but stored - Field noIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO); + FieldType customType2 = new FieldType(); + customType2.setStored(true); + Field noIndex = new Field("foo", customType2, "a"); // not indexed but stored, omitNorms is set - Field noNormsNoIndex = new Field("foo", "a", Field.Store.YES, Field.Index.NO); - noNormsNoIndex.setOmitNorms(true); + FieldType customType3 = new FieldType(); + customType3.setStored(true); + customType3.setOmitNorms(true); + Field noNormsNoIndex = new Field("foo", customType3, "a"); // not indexed nor stored (doesnt exist at all, we index a different field instead) - Field emptyNorms = new Field("bar", "a", Field.Store.YES, Field.Index.ANALYZED); + Field emptyNorms = new Field("bar", customType, "a"); assertNotNull(getNorms("foo", norms, norms)); assertNull(getNorms("foo", norms, noNorms)); diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java b/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java index 0e52cdb2444..3c501776720 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitPositions.java @@ -21,6 +21,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; @@ -37,8 +39,9 @@ public class TestOmitPositions extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random, dir); Document doc = new Document(); - Field f = newField("foo", "this is a test test", Field.Index.ANALYZED); - f.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + FieldType ft = new FieldType(TextField.TYPE_UNSTORED); + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + Field f = newField("foo", "this is a test test", ft); doc.add(f); for (int i = 0; i < 100; i++) { w.addDocument(doc); @@ -67,42 +70,42 @@ public class TestOmitPositions extends LuceneTestCase { Document d = new Document(); // f1,f2,f3: docs only - Field f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED); - f1.setIndexOptions(IndexOptions.DOCS_ONLY); + FieldType ft = new FieldType(TextField.TYPE_UNSTORED); + ft.setIndexOptions(IndexOptions.DOCS_ONLY); + + Field f1 = newField("f1", "This field has docs only", ft); d.add(f1); - Field f2 = newField("f2", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED); - f2.setIndexOptions(IndexOptions.DOCS_ONLY); + Field f2 = newField("f2", "This field has docs only", ft); d.add(f2); - Field f3 = newField("f3", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED); - f3.setIndexOptions(IndexOptions.DOCS_ONLY); + Field f3 = newField("f3", "This field has docs only", ft); d.add(f3); + + FieldType ft2 = new FieldType(TextField.TYPE_UNSTORED); + ft2.setIndexOptions(IndexOptions.DOCS_AND_FREQS); // f4,f5,f6 docs and freqs - Field f4 = newField("f4", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED); - f4.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + Field f4 = newField("f4", "This field has docs and freqs", ft2); d.add(f4); - Field f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED); - f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + Field f5 = newField("f5", "This field has docs and freqs", ft2); d.add(f5); - Field f6 = newField("f6", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED); - f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + Field f6 = newField("f6", "This field has docs and freqs", ft2); d.add(f6); + FieldType ft3 = new FieldType(TextField.TYPE_UNSTORED); + ft3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + // f7,f8,f9 docs/freqs/positions - Field f7 = newField("f7", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED); - f7.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + Field f7 = newField("f7", "This field has docs and freqs and positions", ft3); d.add(f7); - Field f8 = newField("f8", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED); - f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + Field f8 = newField("f8", "This field has docs and freqs and positions", ft3); d.add(f8); - Field f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED); - f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + Field f9 = newField("f9", "This field has docs and freqs and positions", ft3); d.add(f9); writer.addDocument(d); @@ -113,42 +116,33 @@ public class TestOmitPositions extends LuceneTestCase { d = new Document(); // f1,f4,f7: docs only - f1 = newField("f1", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED); - f1.setIndexOptions(IndexOptions.DOCS_ONLY); + f1 = newField("f1", "This field has docs only", ft); d.add(f1); - f4 = newField("f4", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED); - f4.setIndexOptions(IndexOptions.DOCS_ONLY); + f4 = newField("f4", "This field has docs only", ft); d.add(f4); - f7 = newField("f7", "This field has docs only", Field.Store.NO, Field.Index.ANALYZED); - f7.setIndexOptions(IndexOptions.DOCS_ONLY); + f7 = newField("f7", "This field has docs only", ft); d.add(f7); // f2, f5, f8: docs and freqs - f2 = newField("f2", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED); - f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + f2 = newField("f2", "This field has docs and freqs", ft2); d.add(f2); - f5 = newField("f5", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED); - f5.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + f5 = newField("f5", "This field has docs and freqs", ft2); d.add(f5); - f8 = newField("f8", "This field has docs and freqs", Field.Store.NO, Field.Index.ANALYZED); - f8.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + f8 = newField("f8", "This field has docs and freqs", ft2); d.add(f8); // f3, f6, f9: docs and freqs and positions - f3 = newField("f3", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED); - f3.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + f3 = newField("f3", "This field has docs and freqs and positions", ft3); d.add(f3); - f6 = newField("f6", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED); - f6.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + f6 = newField("f6", "This field has docs and freqs and positions", ft3); d.add(f6); - f9 = newField("f9", "This field has docs and freqs and positions", Field.Store.NO, Field.Index.ANALYZED); - f9.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + f9 = newField("f9", "This field has docs and freqs and positions", ft3); d.add(f9); writer.addDocument(d); @@ -201,9 +195,10 @@ public class TestOmitPositions extends LuceneTestCase { lmp.setMergeFactor(2); lmp.setUseCompoundFile(false); Document d = new Document(); - - Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); - f1.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + + FieldType ft = new FieldType(TextField.TYPE_UNSTORED); + ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS); + Field f1 = newField("f1", "This field has term freqs", ft); d.add(f1); for(int i=0;i<30;i++) @@ -215,7 +210,7 @@ public class TestOmitPositions extends LuceneTestCase { // now add some documents with positions, and check there is no prox after optimization d = new Document(); - f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED); + f1 = newField("f1", "This field has positions", TextField.TYPE_UNSTORED); d.add(f1); for(int i=0;i<30;i++) diff --git a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java index d9bae536231..1356fd8ddf7 100644 --- a/lucene/src/test/org/apache/lucene/index/TestOmitTf.java +++ b/lucene/src/test/org/apache/lucene/index/TestOmitTf.java @@ -22,12 +22,13 @@ import java.io.IOException; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.TermContext; -import org.apache.lucene.util._TestUtil; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.*; import org.apache.lucene.search.BooleanClause.Occur; @@ -54,6 +55,13 @@ public class TestOmitTf extends LuceneTestCase { } } + private static final FieldType omitType = new FieldType(TextField.TYPE_UNSTORED); + private static final FieldType normalType = new FieldType(TextField.TYPE_UNSTORED); + + static { + omitType.setIndexOptions(IndexOptions.DOCS_ONLY); + } + // Tests whether the DocumentWriter correctly enable the // omitTermFreqAndPositions bit in the FieldInfo public void testOmitTermFreqAndPositions() throws Exception { @@ -63,12 +71,11 @@ public class TestOmitTf extends LuceneTestCase { Document d = new Document(); // this field will have Tf - Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); + Field f1 = newField("f1", "This field has term freqs", normalType); d.add(f1); // this field will NOT have Tf - Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); - f2.setIndexOptions(IndexOptions.DOCS_ONLY); + Field f2 = newField("f2", "This field has NO Tf in all docs", omitType); d.add(f2); writer.addDocument(d); @@ -78,10 +85,10 @@ public class TestOmitTf extends LuceneTestCase { d = new Document(); // Reverse - f1.setIndexOptions(IndexOptions.DOCS_ONLY); + f1 = newField("f1", "This field has term freqs", omitType); d.add(f1); - f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + f2 = newField("f2", "This field has NO Tf in all docs", normalType); d.add(f2); writer.addDocument(d); @@ -115,12 +122,11 @@ public class TestOmitTf extends LuceneTestCase { Document d = new Document(); // this field will have Tf - Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); + Field f1 = newField("f1", "This field has term freqs", normalType); d.add(f1); // this field will NOT have Tf - Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); - f2.setIndexOptions(IndexOptions.DOCS_ONLY); + Field f2 = newField("f2", "This field has NO Tf in all docs", omitType); d.add(f2); for(int i=0;i<30;i++) @@ -131,10 +137,10 @@ public class TestOmitTf extends LuceneTestCase { d = new Document(); // Reverese - f1.setIndexOptions(IndexOptions.DOCS_ONLY); + f1 = newField("f1", "This field has term freqs", omitType); d.add(f1); - f2.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + f2 = newField("f2", "This field has NO Tf in all docs", normalType); d.add(f2); for(int i=0;i<30;i++) @@ -169,18 +175,16 @@ public class TestOmitTf extends LuceneTestCase { Document d = new Document(); // this field will have Tf - Field f1 = newField("f1", "This field has term freqs", Field.Store.NO, Field.Index.ANALYZED); + Field f1 = newField("f1", "This field has term freqs", normalType); d.add(f1); // this field will NOT have Tf - Field f2 = newField("f2", "This field has NO Tf in all docs", Field.Store.NO, Field.Index.ANALYZED); + Field f2 = newField("f2", "This field has NO Tf in all docs", omitType); d.add(f2); for(int i=0;i<5;i++) writer.addDocument(d); - f2.setIndexOptions(IndexOptions.DOCS_ONLY); - for(int i=0;i<20;i++) writer.addDocument(d); @@ -218,8 +222,7 @@ public class TestOmitTf extends LuceneTestCase { lmp.setUseCompoundFile(false); Document d = new Document(); - Field f1 = newField("f1", "This field has no term freqs", Field.Store.NO, Field.Index.ANALYZED); - f1.setIndexOptions(IndexOptions.DOCS_ONLY); + Field f1 = newField("f1", "This field has term freqs", omitType); d.add(f1); for(int i=0;i<30;i++) @@ -231,7 +234,7 @@ public class TestOmitTf extends LuceneTestCase { // now add some documents with positions, and check there is no prox after optimization d = new Document(); - f1 = newField("f1", "This field has positions", Field.Store.NO, Field.Index.ANALYZED); + f1 = newField("f1", "This field has positions", TextField.TYPE_UNSTORED); d.add(f1); for(int i=0;i<30;i++) @@ -265,11 +268,10 @@ public class TestOmitTf extends LuceneTestCase { Document d = new Document(); sb.append(term).append(" "); String content = sb.toString(); - Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), Field.Store.NO, Field.Index.ANALYZED); - noTf.setIndexOptions(IndexOptions.DOCS_ONLY); + Field noTf = newField("noTf", content + (i%2==0 ? "" : " notf"), omitType); d.add(noTf); - Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), Field.Store.NO, Field.Index.ANALYZED); + Field tf = newField("tf", content + (i%2==0 ? " tf" : ""), normalType); d.add(tf); writer.addDocument(d); diff --git a/lucene/src/test/org/apache/lucene/index/TestParallelReader.java b/lucene/src/test/org/apache/lucene/index/TestParallelReader.java index b2d0b3cd26a..d584ca4953a 100644 --- a/lucene/src/test/org/apache/lucene/index/TestParallelReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestParallelReader.java @@ -18,14 +18,12 @@ package org.apache.lucene.index; */ import java.io.IOException; -import java.util.Arrays; import java.util.Collection; import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.MapFieldSelector; +import org.apache.lucene.document.TextField; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; @@ -89,30 +87,6 @@ public class TestParallelReader extends LuceneTestCase { dir2.close(); } - public void testDocument() throws IOException { - Directory dir1 = getDir1(random); - Directory dir2 = getDir2(random); - ParallelReader pr = new ParallelReader(); - pr.add(IndexReader.open(dir1, false)); - pr.add(IndexReader.open(dir2, false)); - - Document doc11 = pr.document(0, new MapFieldSelector("f1")); - Document doc24 = pr.document(1, new MapFieldSelector(Arrays.asList("f4"))); - Document doc223 = pr.document(1, new MapFieldSelector("f2", "f3")); - - assertEquals(1, doc11.getFields().size()); - assertEquals(1, doc24.getFields().size()); - assertEquals(2, doc223.getFields().size()); - - assertEquals("v1", doc11.get("f1")); - assertEquals("v2", doc24.get("f4")); - assertEquals("v2", doc223.get("f2")); - assertEquals("v2", doc223.get("f3")); - pr.close(); - dir1.close(); - dir2.close(); - } - public void testIncompatibleIndexes() throws IOException { // two documents: Directory dir1 = getDir1(random); @@ -121,7 +95,8 @@ public class TestParallelReader extends LuceneTestCase { Directory dir2 = newDirectory(); IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d3 = new Document(); - d3.add(newField("f3", "v1", Field.Store.YES, Field.Index.ANALYZED)); + + d3.add(newField("f3", "v1", TextField.TYPE_STORED)); w2.addDocument(d3); w2.close(); @@ -179,7 +154,7 @@ public class TestParallelReader extends LuceneTestCase { setMergePolicy(newLogMergePolicy(10)) ); Document d = new Document(); - d.add(newField("f1", "v1", Field.Store.YES, Field.Index.ANALYZED)); + d.add(newField("f1", "v1", TextField.TYPE_STORED)); modifier.addDocument(d); modifier.close(); @@ -189,7 +164,7 @@ public class TestParallelReader extends LuceneTestCase { setMergePolicy(newLogMergePolicy(10)) ); d = new Document(); - d.add(newField("f2", "v2", Field.Store.YES, Field.Index.ANALYZED)); + d.add(newField("f2", "v2", TextField.TYPE_STORED)); modifier.addDocument(d); modifier.close(); @@ -246,16 +221,16 @@ public class TestParallelReader extends LuceneTestCase { dir = newDirectory(); IndexWriter w = new IndexWriter(dir, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d1 = new Document(); - d1.add(newField("f1", "v1", Field.Store.YES, Field.Index.ANALYZED)); - d1.add(newField("f2", "v1", Field.Store.YES, Field.Index.ANALYZED)); - d1.add(newField("f3", "v1", Field.Store.YES, Field.Index.ANALYZED)); - d1.add(newField("f4", "v1", Field.Store.YES, Field.Index.ANALYZED)); + d1.add(newField("f1", "v1", TextField.TYPE_STORED)); + d1.add(newField("f2", "v1", TextField.TYPE_STORED)); + d1.add(newField("f3", "v1", TextField.TYPE_STORED)); + d1.add(newField("f4", "v1", TextField.TYPE_STORED)); w.addDocument(d1); Document d2 = new Document(); - d2.add(newField("f1", "v2", Field.Store.YES, Field.Index.ANALYZED)); - d2.add(newField("f2", "v2", Field.Store.YES, Field.Index.ANALYZED)); - d2.add(newField("f3", "v2", Field.Store.YES, Field.Index.ANALYZED)); - d2.add(newField("f4", "v2", Field.Store.YES, Field.Index.ANALYZED)); + d2.add(newField("f1", "v2", TextField.TYPE_STORED)); + d2.add(newField("f2", "v2", TextField.TYPE_STORED)); + d2.add(newField("f3", "v2", TextField.TYPE_STORED)); + d2.add(newField("f4", "v2", TextField.TYPE_STORED)); w.addDocument(d2); w.close(); @@ -276,12 +251,12 @@ public class TestParallelReader extends LuceneTestCase { Directory dir1 = newDirectory(); IndexWriter w1 = new IndexWriter(dir1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d1 = new Document(); - d1.add(newField("f1", "v1", Field.Store.YES, Field.Index.ANALYZED)); - d1.add(newField("f2", "v1", Field.Store.YES, Field.Index.ANALYZED)); + d1.add(newField("f1", "v1", TextField.TYPE_STORED)); + d1.add(newField("f2", "v1", TextField.TYPE_STORED)); w1.addDocument(d1); Document d2 = new Document(); - d2.add(newField("f1", "v2", Field.Store.YES, Field.Index.ANALYZED)); - d2.add(newField("f2", "v2", Field.Store.YES, Field.Index.ANALYZED)); + d2.add(newField("f1", "v2", TextField.TYPE_STORED)); + d2.add(newField("f2", "v2", TextField.TYPE_STORED)); w1.addDocument(d2); w1.close(); return dir1; @@ -291,12 +266,12 @@ public class TestParallelReader extends LuceneTestCase { Directory dir2 = newDirectory(); IndexWriter w2 = new IndexWriter(dir2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document d3 = new Document(); - d3.add(newField("f3", "v1", Field.Store.YES, Field.Index.ANALYZED)); - d3.add(newField("f4", "v1", Field.Store.YES, Field.Index.ANALYZED)); + d3.add(newField("f3", "v1", TextField.TYPE_STORED)); + d3.add(newField("f4", "v1", TextField.TYPE_STORED)); w2.addDocument(d3); Document d4 = new Document(); - d4.add(newField("f3", "v2", Field.Store.YES, Field.Index.ANALYZED)); - d4.add(newField("f4", "v2", Field.Store.YES, Field.Index.ANALYZED)); + d4.add(newField("f3", "v2", TextField.TYPE_STORED)); + d4.add(newField("f4", "v2", TextField.TYPE_STORED)); w2.addDocument(d4); w2.close(); return dir2; diff --git a/lucene/src/test/org/apache/lucene/index/TestParallelReaderEmptyIndex.java b/lucene/src/test/org/apache/lucene/index/TestParallelReaderEmptyIndex.java index ea03bd7f245..f34a2b2ea14 100644 --- a/lucene/src/test/org/apache/lucene/index/TestParallelReaderEmptyIndex.java +++ b/lucene/src/test/org/apache/lucene/index/TestParallelReaderEmptyIndex.java @@ -21,13 +21,11 @@ import java.io.IOException; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; /** @@ -77,11 +75,11 @@ public class TestParallelReaderEmptyIndex extends LuceneTestCase { { IndexWriter iw = new IndexWriter(rd1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); Document doc = new Document(); - doc.add(newField("test", "", Store.NO, Index.ANALYZED, - TermVector.YES)); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setStoreTermVectors(true); + doc.add(newField("test", "", customType)); iw.addDocument(doc); - doc.add(newField("test", "", Store.NO, Index.ANALYZED, - TermVector.NO)); + doc.add(newField("test", "", TextField.TYPE_UNSTORED)); iw.addDocument(doc); iw.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java b/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java index 04d50021493..9b63b7cfa5b 100755 --- a/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java +++ b/lucene/src/test/org/apache/lucene/index/TestParallelTermEnum.java @@ -22,8 +22,7 @@ import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -41,11 +40,9 @@ public class TestParallelTermEnum extends LuceneTestCase { IndexWriter iw1 = new IndexWriter(rd1, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); doc = new Document(); - doc.add(newField("field1", "the quick brown fox jumps", Store.YES, - Index.ANALYZED)); - doc.add(newField("field2", "the quick brown fox jumps", Store.YES, - Index.ANALYZED)); - doc.add(newField("field4", "", Store.NO, Index.ANALYZED)); + doc.add(newField("field1", "the quick brown fox jumps", TextField.TYPE_STORED)); + doc.add(newField("field2", "the quick brown fox jumps", TextField.TYPE_STORED)); + doc.add(newField("field4", "", TextField.TYPE_UNSTORED)); iw1.addDocument(doc); iw1.close(); @@ -53,11 +50,9 @@ public class TestParallelTermEnum extends LuceneTestCase { IndexWriter iw2 = new IndexWriter(rd2, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random))); doc = new Document(); - doc.add(newField("field0", "", Store.NO, Index.ANALYZED)); - doc.add(newField("field1", "the fox jumps over the lazy dog", - Store.YES, Index.ANALYZED)); - doc.add(newField("field3", "the fox jumps over the lazy dog", - Store.YES, Index.ANALYZED)); + doc.add(newField("field0", "", TextField.TYPE_UNSTORED)); + doc.add(newField("field1", "the fox jumps over the lazy dog", TextField.TYPE_STORED)); + doc.add(newField("field3", "the fox jumps over the lazy dog", TextField.TYPE_STORED)); iw2.addDocument(doc); iw2.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java b/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java index 59786366df3..768de2592dc 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java +++ b/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java @@ -28,9 +28,8 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor; import org.apache.lucene.search.DocIdSetIterator; @@ -135,12 +134,14 @@ public class TestPayloadProcessorProvider extends LuceneTestCase { ); TokenStream payloadTS1 = new PayloadTokenStream("p1"); TokenStream payloadTS2 = new PayloadTokenStream("p2"); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setOmitNorms(true); for (int i = 0; i < NUM_DOCS; i++) { Document doc = new Document(); - doc.add(newField("id", "doc" + i, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(newField("content", "doc content " + i, Store.NO, Index.ANALYZED)); - doc.add(new Field("p", payloadTS1)); - doc.add(new Field("p", payloadTS2)); + doc.add(newField("id", "doc" + i, customType)); + doc.add(newField("content", "doc content " + i, TextField.TYPE_UNSTORED)); + doc.add(new TextField("p", payloadTS1)); + doc.add(new TextField("p", payloadTS2)); writer.addDocument(doc); if (multipleCommits && (i % 4 == 0)) { writer.commit(); diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloads.java b/lucene/src/test/org/apache/lucene/index/TestPayloads.java index f726987e34a..6ece3e5d9e8 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPayloads.java +++ b/lucene/src/test/org/apache/lucene/index/TestPayloads.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import java.io.IOException; import java.io.Reader; -import java.io.UnsupportedEncodingException; import java.nio.charset.Charset; import java.util.ArrayList; import java.util.HashMap; @@ -35,13 +34,13 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Bits; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util.UnicodeUtil; import org.apache.lucene.util._TestUtil; public class TestPayloads extends LuceneTestCase { @@ -101,15 +100,15 @@ public class TestPayloads extends LuceneTestCase { IndexWriter writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer)); Document d = new Document(); // this field won't have any payloads - d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f1", "This field has no payloads", TextField.TYPE_UNSTORED)); // this field will have payloads in all docs, however not for all term positions, // so this field is used to check if the DocumentWriter correctly enables the payloads bit // even if only some term positions have payloads - d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); - d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); + d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); // this field is used to verify if the SegmentMerger enables payloads for a field if it has payloads // enabled in only some documents - d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED)); // only add payload data for field f2 analyzer.setPayloadData("f2", 1, "somedata".getBytes(), 0, 1); writer.addDocument(d); @@ -128,10 +127,10 @@ public class TestPayloads extends LuceneTestCase { writer = new IndexWriter(ram, newIndexWriterConfig( TEST_VERSION_CURRENT, analyzer).setOpenMode(OpenMode.CREATE)); d = new Document(); - d.add(newField("f1", "This field has no payloads", Field.Store.NO, Field.Index.ANALYZED)); - d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); - d.add(newField("f2", "This field has payloads in all docs", Field.Store.NO, Field.Index.ANALYZED)); - d.add(newField("f3", "This field has payloads in some docs", Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField("f1", "This field has no payloads", TextField.TYPE_UNSTORED)); + d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); + d.add(newField("f2", "This field has payloads in all docs", TextField.TYPE_UNSTORED)); + d.add(newField("f3", "This field has payloads in some docs", TextField.TYPE_UNSTORED)); // add payload data for field f2 and f3 analyzer.setPayloadData("f2", "somedata".getBytes(), 0, 1); analyzer.setPayloadData("f3", "somedata".getBytes(), 0, 3); @@ -188,7 +187,7 @@ public class TestPayloads extends LuceneTestCase { byte[] payloadData = generateRandomData(payloadDataLength); Document d = new Document(); - d.add(newField(fieldName, content, Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField(fieldName, content, TextField.TYPE_UNSTORED)); // add the same document multiple times to have the same payload lengths for all // occurrences within two consecutive skip intervals int offset = 0; @@ -319,7 +318,7 @@ public class TestPayloads extends LuceneTestCase { String singleTerm = "lucene"; d = new Document(); - d.add(newField(fieldName, singleTerm, Field.Store.NO, Field.Index.ANALYZED)); + d.add(newField(fieldName, singleTerm, TextField.TYPE_UNSTORED)); // add a payload whose length is greater than the buffer size of BufferedIndexOutput payloadData = generateRandomData(2000); analyzer.setPayloadData(fieldName, payloadData, 100, 1500); @@ -511,7 +510,7 @@ public class TestPayloads extends LuceneTestCase { try { for (int j = 0; j < numDocs; j++) { Document d = new Document(); - d.add(new Field(field, new PoolingPayloadTokenStream(pool))); + d.add(new TextField(field, new PoolingPayloadTokenStream(pool))); writer.addDocument(d); } } catch (Exception e) { @@ -612,14 +611,14 @@ public class TestPayloads extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, true)); Document doc = new Document(); - doc.add(new Field("hasMaybepayload", "here we go", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("hasMaybepayload", TextField.TYPE_STORED, "here we go")); writer.addDocument(doc); writer.close(); writer = new RandomIndexWriter(random, dir, new MockAnalyzer(random, MockTokenizer.WHITESPACE, true)); doc = new Document(); - doc.add(new Field("hasMaybepayload2", "here we go", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("hasMaybepayload2", TextField.TYPE_STORED, "here we go")); writer.addDocument(doc); writer.addDocument(doc); writer.optimize(); diff --git a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java index a8e7d79838f..f55b50fd8ea 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java +++ b/lucene/src/test/org/apache/lucene/index/TestPerFieldCodecSupport.java @@ -22,9 +22,11 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.index.CheckIndex.Status; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CheckIndex.Status.SegmentInfoStatus; +import org.apache.lucene.index.CheckIndex.Status; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.codecs.Codec; import org.apache.lucene.index.codecs.CodecProvider; @@ -63,7 +65,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { private void addDocs(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } } @@ -71,7 +73,7 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { private void addDocs2(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - doc.add(newField("content", "bbb", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "bbb", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } } @@ -79,8 +81,8 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { private void addDocs3(IndexWriter writer, int numDocs) throws IOException { for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - doc.add(newField("content", "ccc", Field.Store.NO, Field.Index.ANALYZED)); - doc.add(newField("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("content", "ccc", TextField.TYPE_UNSTORED)); + doc.add(newField("id", "" + i, StringField.TYPE_STORED)); writer.addDocument(doc); } } @@ -271,8 +273,6 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { @Test public void testStressPerFieldCodec() throws IOException { Directory dir = newDirectory(random); - Index[] indexValue = new Index[] { Index.ANALYZED, Index.ANALYZED_NO_NORMS, - Index.NOT_ANALYZED, Index.NOT_ANALYZED_NO_NORMS }; final int docsPerRound = 97; int numRounds = atLeast(1); for (int i = 0; i < numRounds; i++) { @@ -297,9 +297,11 @@ public class TestPerFieldCodecSupport extends LuceneTestCase { for (int j = 0; j < docsPerRound; j++) { final Document doc = new Document(); for (int k = 0; k < num; k++) { + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setTokenized(random.nextBoolean()); + customType.setOmitNorms(random.nextBoolean()); Field field = newField("" + k, _TestUtil - .randomRealisticUnicodeString(random, 128), indexValue[random - .nextInt(indexValue.length)]); + .randomRealisticUnicodeString(random, 128), customType); doc.add(field); } writer.addDocument(doc); diff --git a/lucene/src/test/org/apache/lucene/index/TestRollback.java b/lucene/src/test/org/apache/lucene/index/TestRollback.java index 51d8cc4b22d..47b4669b101 100644 --- a/lucene/src/test/org/apache/lucene/index/TestRollback.java +++ b/lucene/src/test/org/apache/lucene/index/TestRollback.java @@ -19,8 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -32,7 +31,7 @@ public class TestRollback extends LuceneTestCase { RandomIndexWriter rw = new RandomIndexWriter(random, dir); for (int i = 0; i < 5; i++) { Document doc = new Document(); - doc.add(newField("pk", Integer.toString(i), Store.YES, Index.ANALYZED_NO_NORMS)); + doc.add(newField("pk", Integer.toString(i), StringField.TYPE_STORED)); rw.addDocument(doc); } rw.close(); @@ -43,8 +42,8 @@ public class TestRollback extends LuceneTestCase { for (int i = 0; i < 3; i++) { Document doc = new Document(); String value = Integer.toString(i); - doc.add(newField("pk", value, Store.YES, Index.ANALYZED_NO_NORMS)); - doc.add(newField("text", "foo", Store.YES, Index.ANALYZED_NO_NORMS)); + doc.add(newField("pk", value, StringField.TYPE_STORED)); + doc.add(newField("text", "foo", StringField.TYPE_STORED)); w.updateDocument(new Term("pk", value), doc); } w.rollback(); diff --git a/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java index 1b0386bddec..62ca6ed3214 100644 --- a/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java +++ b/lucene/src/test/org/apache/lucene/index/TestRollingUpdates.java @@ -19,7 +19,6 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.*; -import org.apache.lucene.document.Field.Index; import org.apache.lucene.index.codecs.CodecProvider; import org.apache.lucene.store.*; import org.apache.lucene.util.*; @@ -55,7 +54,7 @@ public class TestRollingUpdates extends LuceneTestCase { } else { id++; } - doc.getField("docid").setValue(myID); + ((Field) doc.getField("docid")).setValue(myID); w.updateDocument(new Term("docid", myID), doc); if (docIter >= SIZE && random.nextInt(50) == 17) { @@ -130,7 +129,7 @@ public class TestRollingUpdates extends LuceneTestCase { IndexReader open = null; for (int i = 0; i < num; i++) { Document doc = new Document();// docs.nextDoc(); - doc.add(newField("id", "test", Index.NOT_ANALYZED)); + doc.add(newField("id", "test", StringField.TYPE_UNSTORED)); writer.updateDocument(new Term("id", "test"), doc); if (random.nextInt(3) == 0) { if (open == null) { diff --git a/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java b/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java index 8097c134865..8e9e58c92ec 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java +++ b/lucene/src/test/org/apache/lucene/index/TestSameTokenSamePosition.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.io.Reader; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; @@ -29,6 +28,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -42,8 +42,7 @@ public class TestSameTokenSamePosition extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer())); Document doc = new Document(); - doc.add(new Field("eng", "Six drunken" /*This shouldn't matter. */, - Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("eng", TextField.TYPE_STORED, "Six drunken" /*This shouldn't matter. */)); riw.addDocument(doc); riw.close(); dir.close(); @@ -56,8 +55,7 @@ public class TestSameTokenSamePosition extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter riw = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new BugReproAnalyzer())); Document doc = new Document(); - doc.add(new Field("eng", "Six drunken" /*This shouldn't matter. */, - Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("eng", TextField.TYPE_STORED, "Six drunken" /*This shouldn't matter. */)); for (int i = 0; i < 100; i++) { riw.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java b/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java index e23b0be1c91..78d837a89cc 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentInfo.java @@ -3,8 +3,7 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; @@ -33,7 +32,7 @@ public class TestSegmentInfo extends LuceneTestCase { IndexWriter writer = new IndexWriter(dir, conf); writer.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); - doc.add(new Field("a", "value", Store.YES, Index.ANALYZED)); + doc.add(new Field("a", TextField.TYPE_STORED, "value")); writer.addDocument(doc); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java index ba73fc34794..a84a96d4cca 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentMerger.java @@ -18,14 +18,10 @@ package org.apache.lucene.index; */ import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.store.BufferedIndexInput; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.util.BytesRef; @@ -142,7 +138,7 @@ public class TestSegmentMerger extends LuceneTestCase { // Create an index w/ .del file w.addDocument(new Document()); Document doc = new Document(); - doc.add(new Field("c", "test", Store.NO, Index.ANALYZED)); + doc.add(new TextField("c", "test")); w.addDocument(doc); w.commit(); w.deleteDocuments(new Term("c", "test")); @@ -160,7 +156,7 @@ public class TestSegmentMerger extends LuceneTestCase { // Create an index w/ .s* w = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); doc = new Document(); - doc.add(new Field("c", "test", Store.NO, Index.ANALYZED)); + doc.add(new TextField("c", "test")); w.addDocument(doc); w.close(); IndexReader r = IndexReader.open(dir, false); diff --git a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java index 2c780f64f5d..cbd658476e2 100644 --- a/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java +++ b/lucene/src/test/org/apache/lucene/index/TestSegmentReader.java @@ -26,10 +26,8 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.BytesRef; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IOContext.Context; public class TestSegmentReader extends LuceneTestCase { private Directory dir; @@ -68,8 +66,8 @@ public class TestSegmentReader extends LuceneTestCase { //There are 2 unstored fields on the document that are not preserved across writing assertTrue(DocHelper.numFields(result) == DocHelper.numFields(testDoc) - DocHelper.unstored.size()); - List fields = result.getFields(); - for (final Fieldable field : fields ) { + List fields = result.getFields(); + for (final IndexableField field : fields ) { assertTrue(field != null); assertTrue(DocHelper.nameValues.containsKey(field.name())); } @@ -176,9 +174,9 @@ public class TestSegmentReader extends LuceneTestCase { public static void checkNorms(IndexReader reader) throws IOException { // test omit norms for (int i=0; i aDocs = new HashSet(); final Document doc = new Document(); - final Field f = newField("field", "", Field.Index.NOT_ANALYZED_NO_NORMS); + final Field f = newField("field", "", StringField.TYPE_UNSTORED); doc.add(f); - final Field idField = newField("id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + final Field idField = newField("id", "", StringField.TYPE_STORED); doc.add(idField); int num = atLeast(4097); for(int id=0;id fieldNameComparator = new Comparator() { - public int compare(Fieldable o1, Fieldable o2) { - return o1.name().compareTo(o2.name()); - } + static Comparator fieldNameComparator = new Comparator() { + public int compare(IndexableField o1, IndexableField o2) { + return o1.name().compareTo(o2.name()); + } }; // This test avoids using any extra synchronization in the multiple @@ -249,13 +250,12 @@ public class TestStressIndexing2 extends LuceneTestCase { Iterator iter = docs.values().iterator(); while (iter.hasNext()) { Document d = iter.next(); - ArrayList fields = new ArrayList(); + ArrayList fields = new ArrayList(); fields.addAll(d.getFields()); // put fields in same order each time Collections.sort(fields, fieldNameComparator); Document d1 = new Document(); - d1.setBoost(d.getBoost()); for (int i=0; i ff1 = d1.getFields(); - List ff2 = d2.getFields(); + List ff1 = d1.getFields(); + List ff2 = d2.getFields(); Collections.sort(ff1, fieldNameComparator); Collections.sort(ff2, fieldNameComparator); @@ -517,10 +517,10 @@ public class TestStressIndexing2 extends LuceneTestCase { assertEquals(ff1 + " : " + ff2, ff1.size(), ff2.size()); for (int i=0; i fields = new ArrayList(); String idString = getIdString(); - Field idField = newField("id", idString, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); + Field idField = newField("id", idString, customType1); fields.add(idField); int nFields = nextInt(maxFields); for (int i=0; i(); diff --git a/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java b/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java index 2f11957e8d0..3abf2338612 100644 --- a/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java +++ b/lucene/src/test/org/apache/lucene/index/TestThreadedOptimize.java @@ -22,7 +22,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.store.Directory; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.util.English; @@ -62,10 +63,13 @@ public class TestThreadedOptimize extends LuceneTestCase { ((LogMergePolicy) writer.getConfig().getMergePolicy()).setMergeFactor(1000); + final FieldType customType = new FieldType(StringField.TYPE_STORED); + customType.setOmitNorms(true); + for(int i=0;i<200;i++) { Document d = new Document(); - d.add(newField("id", Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - d.add(newField("contents", English.intToEnglish(i), Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); + d.add(newField("id", Integer.toString(i), customType)); + d.add(newField("contents", English.intToEnglish(i), customType)); writer.addDocument(d); } @@ -85,8 +89,8 @@ public class TestThreadedOptimize extends LuceneTestCase { writerFinal.optimize(false); for(int k=0;k<17*(1+iFinal);k++) { Document d = new Document(); - d.add(newField("id", iterFinal + "_" + iFinal + "_" + j + "_" + k, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - d.add(newField("contents", English.intToEnglish(iFinal+k), Field.Store.NO, Field.Index.ANALYZED_NO_NORMS)); + d.add(newField("id", iterFinal + "_" + iFinal + "_" + j + "_" + k, customType)); + d.add(newField("contents", English.intToEnglish(iFinal+k), customType)); writerFinal.addDocument(d); } for(int k=0;k<9*(1+iFinal);k++) diff --git a/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java b/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java index a4ea7fcd91b..c30d330f903 100644 --- a/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java +++ b/lucene/src/test/org/apache/lucene/index/TestTieredMergePolicy.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; @@ -39,7 +39,7 @@ public class TestTieredMergePolicy extends LuceneTestCase { w.setInfoStream(VERBOSE ? System.out : null); for(int i=0;i<80;i++) { Document doc = new Document(); - doc.add(newField("content", "aaa " + (i%4), Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa " + (i%4), TextField.TYPE_UNSTORED)); w.addDocument(doc); } assertEquals(80, w.maxDoc()); @@ -86,7 +86,7 @@ public class TestTieredMergePolicy extends LuceneTestCase { final int numDocs = _TestUtil.nextInt(random, 20, 100); for(int i=0;i valueVarList, boolean withDeletions, int bytesSize) throws CorruptIndexException, IOException { final boolean isNumeric = NUMERICS.contains(value); FixedBitSet deleted = new FixedBitSet(numValues); Document doc = new Document(); - Index idx = IDX_VALUES[random.nextInt(IDX_VALUES.length)]; - AbstractField field = random.nextBoolean() ? new IndexDocValuesField(value.name()) - : newField(value.name(), _TestUtil.randomRealisticUnicodeString(random, - 10), idx == Index.NO ? Store.YES : Store.NO, idx); - doc.add(field); - IndexDocValuesField valField = new IndexDocValuesField("prototype"); + IndexDocValuesField valField = new IndexDocValuesField(value.name()); + doc.add(valField); final BytesRef bytesRef = new BytesRef(); final String idBase = value.name() + "_"; @@ -544,9 +535,7 @@ public class TestDocValuesIndexing extends LuceneTestCase { } } doc.removeFields("id"); - doc.add(new Field("id", idBase + i, Store.YES, - Index.NOT_ANALYZED_NO_NORMS)); - valField.set(field); + doc.add(new Field("id", StringField.TYPE_STORED, idBase + i)); w.addDocument(doc); if (i % 7 == 0) { @@ -568,8 +557,9 @@ public class TestDocValuesIndexing extends LuceneTestCase { w.commit(); // TODO test unoptimized with deletions - if (withDeletions || random.nextBoolean()) + if (withDeletions || random.nextBoolean()) { w.optimize(true); + } return deleted; } } diff --git a/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java b/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java index 62521c2a7f5..b51ec02f415 100644 --- a/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java +++ b/lucene/src/test/org/apache/lucene/search/BaseTestRangeFilter.java @@ -23,6 +23,7 @@ import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.RandomIndexWriter; @@ -116,9 +117,9 @@ public class BaseTestRangeFilter extends LuceneTestCase { /* build an index */ Document doc = new Document(); - Field idField = newField(random, "id", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); - Field randField = newField(random, "rand", "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); - Field bodyField = newField(random, "body", "", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + Field idField = newField(random, "id", "", StringField.TYPE_STORED); + Field randField = newField(random, "rand", "", StringField.TYPE_STORED); + Field bodyField = newField(random, "body", "", StringField.TYPE_UNSTORED); doc.add(idField); doc.add(randField); doc.add(bodyField); diff --git a/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java b/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java index a148c237387..f38f6251573 100644 --- a/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestAutomatonQuery.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; @@ -46,12 +47,9 @@ public class TestAutomatonQuery extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory); Document doc = new Document(); - Field titleField = newField("title", "some title", Field.Store.NO, - Field.Index.ANALYZED); - Field field = newField(FN, "this is document one 2345", Field.Store.NO, - Field.Index.ANALYZED); - Field footerField = newField("footer", "a footer", Field.Store.NO, - Field.Index.ANALYZED); + Field titleField = newField("title", "some title", TextField.TYPE_UNSTORED); + Field field = newField(FN, "this is document one 2345", TextField.TYPE_UNSTORED); + Field footerField = newField("footer", "a footer", TextField.TYPE_UNSTORED); doc.add(titleField); doc.add(field); doc.add(footerField); diff --git a/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java b/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java index 53af0ad3a62..149e89de66a 100644 --- a/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java +++ b/lucene/src/test/org/apache/lucene/search/TestAutomatonQueryUnicode.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -47,12 +48,9 @@ public class TestAutomatonQueryUnicode extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, directory); Document doc = new Document(); - Field titleField = newField("title", "some title", Field.Store.NO, - Field.Index.ANALYZED); - Field field = newField(FN, "", Field.Store.NO, - Field.Index.ANALYZED); - Field footerField = newField("footer", "a footer", Field.Store.NO, - Field.Index.ANALYZED); + Field titleField = newField("title", "some title", TextField.TYPE_UNSTORED); + Field field = newField(FN, "", TextField.TYPE_UNSTORED); + Field footerField = newField("footer", "a footer", TextField.TYPE_UNSTORED); doc.add(titleField); doc.add(field); doc.add(footerField); diff --git a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java index 2a460a57083..5afd53602bf 100644 --- a/lucene/src/test/org/apache/lucene/search/TestBoolean2.java +++ b/lucene/src/test/org/apache/lucene/search/TestBoolean2.java @@ -22,7 +22,7 @@ import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; @@ -56,7 +56,7 @@ public class TestBoolean2 extends LuceneTestCase { RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); - doc.add(newField(field, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField(field, docFields[i], TextField.TYPE_UNSTORED)); writer.addDocument(doc); } writer.close(); @@ -81,12 +81,12 @@ public class TestBoolean2 extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); - doc.add(newField("field2", "xxx", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("field2", "xxx", TextField.TYPE_UNSTORED)); for(int i=0;i lastScore); lastScore = scores[i]; } diff --git a/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java b/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java index f4b015e52e1..86dacf3eb4c 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocIdSet.java @@ -25,8 +25,7 @@ import java.util.Iterator; import junit.framework.Assert; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.RandomIndexWriter; @@ -103,7 +102,7 @@ public class TestDocIdSet extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); Document doc = new Document(); - doc.add(newField("c", "val", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField("c", "val", StringField.TYPE_UNSTORED)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java index fe642d1337f..bb4630f01dc 100644 --- a/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java +++ b/lucene/src/test/org/apache/lucene/search/TestDocValuesScoring.java @@ -22,6 +22,7 @@ import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IndexDocValuesField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; @@ -50,11 +51,11 @@ public class TestDocValuesScoring extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random, dir); Document doc = new Document(); - Field field = newField("foo", "", Field.Store.NO, Field.Index.ANALYZED); + Field field = newField("foo", "", TextField.TYPE_UNSTORED); doc.add(field); IndexDocValuesField dvField = new IndexDocValuesField("foo_boost"); doc.add(dvField); - Field field2 = newField("bar", "", Field.Store.NO, Field.Index.ANALYZED); + Field field2 = newField("bar", "", TextField.TYPE_UNSTORED); doc.add(field2); field.setValue("quick brown fox"); diff --git a/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java b/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java index 32d1407631b..cf7458152d5 100644 --- a/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java +++ b/lucene/src/test/org/apache/lucene/search/TestElevationComparator.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.search.FieldValueHitQueue.Entry; @@ -124,7 +124,7 @@ public class TestElevationComparator extends LuceneTestCase { private Document adoc(String[] vals) { Document doc = new Document(); for (int i = 0; i < vals.length - 2; i += 2) { - doc.add(newField(vals[i], vals[i + 1], Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField(vals[i], vals[i + 1], TextField.TYPE_STORED)); } return doc; } diff --git a/lucene/src/test/org/apache/lucene/search/TestExplanations.java b/lucene/src/test/org/apache/lucene/search/TestExplanations.java index c814109a295..49a629509ac 100644 --- a/lucene/src/test/org/apache/lucene/search/TestExplanations.java +++ b/lucene/src/test/org/apache/lucene/search/TestExplanations.java @@ -20,6 +20,8 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -73,11 +75,11 @@ public class TestExplanations extends LuceneTestCase { RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); - doc.add(newField(KEY, ""+i, Field.Store.NO, Field.Index.NOT_ANALYZED)); - Field f = newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED); + doc.add(newField(KEY, ""+i, StringField.TYPE_UNSTORED)); + Field f = newField(FIELD, docFields[i], TextField.TYPE_UNSTORED); f.setBoost(i); doc.add(f); - doc.add(newField(ALTFIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField(ALTFIELD, docFields[i], TextField.TYPE_UNSTORED)); writer.addDocument(doc); } reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java index 1a36d3a77c5..8ef58d97c99 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCache.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCache.java @@ -18,7 +18,7 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; @@ -62,17 +62,17 @@ public class TestFieldCache extends LuceneTestCase { writer.w.setInfoStream(VERBOSE ? System.out : null); for (int i = 0; i < NUM_DOCS; i++){ Document doc = new Document(); - doc.add(newField("theLong", String.valueOf(theLong--), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("theDouble", String.valueOf(theDouble--), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("theByte", String.valueOf(theByte--), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("theShort", String.valueOf(theShort--), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("theInt", String.valueOf(theInt--), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("theFloat", String.valueOf(theFloat--), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("theLong", String.valueOf(theLong--), StringField.TYPE_UNSTORED)); + doc.add(newField("theDouble", String.valueOf(theDouble--), StringField.TYPE_UNSTORED)); + doc.add(newField("theByte", String.valueOf(theByte--), StringField.TYPE_UNSTORED)); + doc.add(newField("theShort", String.valueOf(theShort--), StringField.TYPE_UNSTORED)); + doc.add(newField("theInt", String.valueOf(theInt--), StringField.TYPE_UNSTORED)); + doc.add(newField("theFloat", String.valueOf(theFloat--), StringField.TYPE_UNSTORED)); // sometimes skip the field: if (random.nextInt(40) != 17) { unicodeStrings[i] = generateString(i); - doc.add(newField("theRandomUnicodeString", unicodeStrings[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField("theRandomUnicodeString", unicodeStrings[i], StringField.TYPE_STORED)); } // sometimes skip the field: @@ -80,7 +80,7 @@ public class TestFieldCache extends LuceneTestCase { for (int j = 0; j < NUM_ORDS; j++) { String newValue = generateString(i); multiValued[i][j] = new BytesRef(newValue); - doc.add(newField("theRandomUnicodeMultiValuedField", newValue, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField("theRandomUnicodeMultiValuedField", newValue, StringField.TYPE_STORED)); } Arrays.sort(multiValued[i]); } diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java b/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java index d33f5a388e4..c2e71ea43de 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCacheRangeFilter.java @@ -25,7 +25,7 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.store.Directory; import org.junit.Test; @@ -535,8 +535,8 @@ public class TestFieldCacheRangeFilter extends BaseTestRangeFilter { for (int d = -20; d <= 20; d++) { Document doc = new Document(); - doc.add(newField("id",Integer.toString(d), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(newField("body","body", Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("id",Integer.toString(d), StringField.TYPE_UNSTORED)); + doc.add(newField("body","body", StringField.TYPE_UNSTORED)); writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java b/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java index f526f3ea3ea..2132521ea9e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestFieldCacheTermsFilter.java @@ -20,7 +20,7 @@ package org.apache.lucene.search; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; @@ -41,7 +41,7 @@ public class TestFieldCacheTermsFilter extends LuceneTestCase { for (int i = 0; i < 100; i++) { Document doc = new Document(); int term = i * 10; //terms are units of 10; - doc.add(newField(fieldName, "" + term, Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField(fieldName, "" + term, StringField.TYPE_STORED)); w.addDocument(doc); } IndexReader reader = w.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java index b901d2f0784..cc19a027f62 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestFilteredQuery.java @@ -21,7 +21,7 @@ import java.util.BitSet; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; @@ -54,23 +54,23 @@ public class TestFilteredQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter (random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); - doc.add (newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); - doc.add (newField("sorter", "b", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("field", "one two three four five", TextField.TYPE_STORED)); + doc.add (newField("sorter", "b", TextField.TYPE_STORED)); writer.addDocument (doc); doc = new Document(); - doc.add (newField("field", "one two three four", Field.Store.YES, Field.Index.ANALYZED)); - doc.add (newField("sorter", "d", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("field", "one two three four", TextField.TYPE_STORED)); + doc.add (newField("sorter", "d", TextField.TYPE_STORED)); writer.addDocument (doc); doc = new Document(); - doc.add (newField("field", "one two three y", Field.Store.YES, Field.Index.ANALYZED)); - doc.add (newField("sorter", "a", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("field", "one two three y", TextField.TYPE_STORED)); + doc.add (newField("sorter", "a", TextField.TYPE_STORED)); writer.addDocument (doc); doc = new Document(); - doc.add (newField("field", "one two x", Field.Store.YES, Field.Index.ANALYZED)); - doc.add (newField("sorter", "c", Field.Store.YES, Field.Index.ANALYZED)); + doc.add (newField("field", "one two x", TextField.TYPE_STORED)); + doc.add (newField("sorter", "c", TextField.TYPE_STORED)); writer.addDocument (doc); // tests here require single segment (eg try seed diff --git a/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java b/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java index ba0222c573f..5552408d1d1 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java +++ b/lucene/src/test/org/apache/lucene/search/TestFilteredSearch.java @@ -22,7 +22,7 @@ import java.io.IOException; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; @@ -64,7 +64,7 @@ public class TestFilteredSearch extends LuceneTestCase { try { for (int i = 0; i < 60; i++) {//Simple docs Document doc = new Document(); - doc.add(newField(FIELD, Integer.toString(i), Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField(FIELD, Integer.toString(i), StringField.TYPE_STORED)); writer.addDocument(doc); } if(optimize) diff --git a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java index 99e97d50dde..3efcea057f9 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery.java @@ -23,7 +23,7 @@ import java.io.IOException; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.RandomIndexWriter; @@ -464,7 +464,7 @@ public class TestFuzzyQuery extends LuceneTestCase { private void addDoc(String text, RandomIndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("field", text, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", text, TextField.TYPE_STORED)); writer.addDocument(doc); } } diff --git a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java index 10ae2109154..f75f1914041 100644 --- a/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java +++ b/lucene/src/test/org/apache/lucene/search/TestFuzzyQuery2.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -90,7 +91,7 @@ public class TestFuzzyQuery2 extends LuceneTestCase { writer.w.setInfoStream(VERBOSE ? System.out : null); Document doc = new Document(); - Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); + Field field = newField("field", "", TextField.TYPE_UNSTORED); doc.add(field); for (int i = 0; i < terms; i++) { diff --git a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java index c60a8becae8..dbb7f2a08d3 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMatchAllDocsQuery.java @@ -22,6 +22,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.IndexReader; @@ -89,7 +90,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase { private void addDoc(String text, IndexWriter iw, float boost) throws IOException { Document doc = new Document(); - Field f = newField("key", text, Field.Store.YES, Field.Index.ANALYZED); + Field f = newField("key", text, TextField.TYPE_STORED); f.setBoost(boost); doc.add(f); iw.addDocument(doc); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java index d919da05dea..a02629f0077 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiPhraseQuery.java @@ -33,6 +33,8 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.RAMDirectory; @@ -164,7 +166,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { private void add(String s, RandomIndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("body", s, TextField.TYPE_STORED)); writer.addDocument(doc); } @@ -287,8 +289,8 @@ public class TestMultiPhraseQuery extends LuceneTestCase { private void add(String s, String type, RandomIndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("body", s, Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("type", type, Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("body", s, TextField.TYPE_STORED)); + doc.add(newField("type", type, StringField.TYPE_UNSTORED)); writer.addDocument(doc); } @@ -396,7 +398,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir, new CannedAnalyzer(tokens)); Document doc = new Document(); - doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new TextField("field", "")); writer.addDocument(doc); writer.addDocument(doc); IndexReader r = writer.getReader(); @@ -488,7 +490,7 @@ public class TestMultiPhraseQuery extends LuceneTestCase { IndexWriterConfig cfg = newIndexWriterConfig(TEST_VERSION_CURRENT, new CannedAnalyzer(INCR_0_DOC_TOKENS)); IndexWriter writer = new IndexWriter(dir, cfg); Document doc = new Document(); - doc.add(new Field("field", "", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(new TextField("field", "")); writer.addDocument(doc); IndexReader r = IndexReader.open(writer,false); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java index b6a1b6ea72b..8effb981f27 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermConstantScore.java @@ -20,10 +20,10 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -32,8 +32,6 @@ import org.junit.BeforeClass; import org.junit.Test; import java.io.IOException; -import java.text.Collator; -import java.util.Locale; import junit.framework.Assert; @@ -64,16 +62,14 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false)).setMergePolicy(newLogMergePolicy())); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setTokenized(false); for (int i = 0; i < data.length; i++) { Document doc = new Document(); - doc.add(newField("id", String.valueOf(i), Field.Store.YES, - Field.Index.NOT_ANALYZED));// Field.Keyword("id",String.valueOf(i))); - doc - .add(newField("all", "all", Field.Store.YES, - Field.Index.NOT_ANALYZED));// Field.Keyword("all","all")); + doc.add(newField("id", String.valueOf(i), customType));// Field.Keyword("id",String.valueOf(i))); + doc.add(newField("all", "all", customType));// Field.Keyword("all","all")); if (null != data[i]) { - doc.add(newField("data", data[i], Field.Store.YES, - Field.Index.ANALYZED));// Field.Text("data",data[i])); + doc.add(newField("data", data[i], TextField.TYPE_STORED));// Field.Text("data",data[i])); } writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java index ee130899a05..619bab59de0 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiTermQueryRewrites.java @@ -19,8 +19,7 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; @@ -53,7 +52,7 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase { for (int i = 0; i < 10; i++) { Document doc = new Document(); - doc.add(newField("data", Integer.toString(i), Field.Store.NO, Field.Index.NOT_ANALYZED)); + doc.add(newField("data", Integer.toString(i), StringField.TYPE_UNSTORED)); writer.addDocument(doc); ((i % 2 == 0) ? swriter1 : swriter2).addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java b/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java index 6f2807de8be..4285c5a3a41 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java @@ -41,9 +41,12 @@ public class TestMultiThreadTermVectors extends LuceneTestCase { IndexWriter writer = new IndexWriter(directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); //writer.setUseCompoundFile(false); //writer.infoStream = System.out; + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setTokenized(false); + customType.setStoreTermVectors(true); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - Fieldable fld = newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.NOT_ANALYZED, Field.TermVector.YES); + Field fld = newField("field", English.intToEnglish(i), customType); doc.add(fld); writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java index 70c0ce99289..e9f4d7add4f 100644 --- a/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestMultiValuedNumericRangeQuery.java @@ -23,8 +23,8 @@ import java.text.DecimalFormatSymbols; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; @@ -51,8 +51,8 @@ public class TestMultiValuedNumericRangeQuery extends LuceneTestCase { Document doc = new Document(); for (int m=0, c=random.nextInt(10); m<=c; m++) { int value = random.nextInt(Integer.MAX_VALUE); - doc.add(newField("asc", format.format(value), Field.Store.NO, Field.Index.NOT_ANALYZED)); - doc.add(new NumericField("trie", Field.Store.NO, true).setIntValue(value)); + doc.add(newField("asc", format.format(value), StringField.TYPE_UNSTORED)); + doc.add(new NumericField("trie").setIntValue(value)); } writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/TestNot.java b/lucene/src/test/org/apache/lucene/search/TestNot.java index b34227ce6ae..d13af0412ce 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNot.java +++ b/lucene/src/test/org/apache/lucene/search/TestNot.java @@ -23,9 +23,8 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.store.Directory; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; /** Similarity unit test. * @@ -38,7 +37,7 @@ public class TestNot extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, store); Document d1 = new Document(); - d1.add(newField("field", "a b", Field.Store.YES, Field.Index.ANALYZED)); + d1.add(newField("field", "a b", TextField.TYPE_STORED)); writer.addDocument(d1); IndexReader reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java index b1ab16d1641..d503dad0f53 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery32.java @@ -19,7 +19,6 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; @@ -58,15 +57,15 @@ public class TestNumericRangeQuery32 extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 100, 1000)) .setMergePolicy(newLogMergePolicy())); - + NumericField - field8 = new NumericField("field8", 8, Field.Store.YES, true), - field4 = new NumericField("field4", 4, Field.Store.YES, true), - field2 = new NumericField("field2", 2, Field.Store.YES, true), - fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, rarely() ? Field.Store.YES : Field.Store.NO, true), - ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), - ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), - ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); + field8 = new NumericField("field8", 8, NumericField.TYPE_STORED), + field4 = new NumericField("field4", 4, NumericField.TYPE_STORED), + field2 = new NumericField("field2", 2, NumericField.TYPE_STORED), + fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, rarely() ? NumericField.TYPE_STORED : NumericField.TYPE_UNSTORED), + ascfield8 = new NumericField("ascfield8", 8, NumericField.TYPE_UNSTORED), + ascfield4 = new NumericField("ascfield4", 4, NumericField.TYPE_UNSTORED), + ascfield2 = new NumericField("ascfield2", 2, NumericField.TYPE_UNSTORED); Document doc = new Document(); // add fields, that have a distance to test general functionality diff --git a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java index 6171f2a542f..65ff32e76eb 100644 --- a/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java +++ b/lucene/src/test/org/apache/lucene/search/TestNumericRangeQuery64.java @@ -19,7 +19,6 @@ package org.apache.lucene.search; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -57,15 +56,15 @@ public class TestNumericRangeQuery64 extends LuceneTestCase { .setMergePolicy(newLogMergePolicy())); NumericField - field8 = new NumericField("field8", 8, Field.Store.YES, true), - field6 = new NumericField("field6", 6, Field.Store.YES, true), - field4 = new NumericField("field4", 4, Field.Store.YES, true), - field2 = new NumericField("field2", 2, Field.Store.YES, true), - fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, rarely() ? Field.Store.YES : Field.Store.NO, true), - ascfield8 = new NumericField("ascfield8", 8, Field.Store.NO, true), - ascfield6 = new NumericField("ascfield6", 6, Field.Store.NO, true), - ascfield4 = new NumericField("ascfield4", 4, Field.Store.NO, true), - ascfield2 = new NumericField("ascfield2", 2, Field.Store.NO, true); + field8 = new NumericField("field8", 8, NumericField.TYPE_STORED), + field6 = new NumericField("field6", 6, NumericField.TYPE_STORED), + field4 = new NumericField("field4", 4, NumericField.TYPE_STORED), + field2 = new NumericField("field2", 2, NumericField.TYPE_STORED), + fieldNoTrie = new NumericField("field"+Integer.MAX_VALUE, Integer.MAX_VALUE, rarely() ? NumericField.TYPE_STORED : NumericField.TYPE_UNSTORED), + ascfield8 = new NumericField("ascfield8", 8, NumericField.TYPE_UNSTORED), + ascfield6 = new NumericField("ascfield6", 6, NumericField.TYPE_UNSTORED), + ascfield4 = new NumericField("ascfield4", 4, NumericField.TYPE_UNSTORED), + ascfield2 = new NumericField("ascfield2", 2, NumericField.TYPE_UNSTORED); Document doc = new Document(); // add fields, that have a distance to test general functionality diff --git a/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java b/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java index 2b4e58f50a8..74e2c2173b5 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPhrasePrefixQuery.java @@ -19,7 +19,7 @@ package org.apache.lucene.search; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.IndexReader; @@ -47,16 +47,11 @@ public class TestPhrasePrefixQuery extends LuceneTestCase { Document doc3 = new Document(); Document doc4 = new Document(); Document doc5 = new Document(); - doc1.add(newField("body", "blueberry pie", Field.Store.YES, - Field.Index.ANALYZED)); - doc2.add(newField("body", "blueberry strudel", Field.Store.YES, - Field.Index.ANALYZED)); - doc3.add(newField("body", "blueberry pizza", Field.Store.YES, - Field.Index.ANALYZED)); - doc4.add(newField("body", "blueberry chewing gum", Field.Store.YES, - Field.Index.ANALYZED)); - doc5.add(newField("body", "piccadilly circus", Field.Store.YES, - Field.Index.ANALYZED)); + doc1.add(newField("body", "blueberry pie", TextField.TYPE_STORED)); + doc2.add(newField("body", "blueberry strudel", TextField.TYPE_STORED)); + doc3.add(newField("body", "blueberry pizza", TextField.TYPE_STORED)); + doc4.add(newField("body", "blueberry chewing gum", TextField.TYPE_STORED)); + doc5.add(newField("body", "piccadilly circus", TextField.TYPE_STORED)); writer.addDocument(doc1); writer.addDocument(doc2); writer.addDocument(doc3); diff --git a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java index 7b991719a80..4a296db2a59 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPhraseQuery.java @@ -68,19 +68,19 @@ public class TestPhraseQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, directory, analyzer); Document doc = new Document(); - doc.add(newField("field", "one two three four five", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.ANALYZED)); - Fieldable repeatedField = newField("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.ANALYZED); + doc.add(newField("field", "one two three four five", TextField.TYPE_STORED)); + doc.add(newField("repeated", "this is a repeated field - first part", TextField.TYPE_STORED)); + IndexableField repeatedField = newField("repeated", "second part of a repeated field", TextField.TYPE_STORED); doc.add(repeatedField); - doc.add(newField("palindrome", "one two three two one", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("palindrome", "one two three two one", TextField.TYPE_STORED)); writer.addDocument(doc); doc = new Document(); - doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("nonexist", "phrase exist notexist exist found", TextField.TYPE_STORED)); writer.addDocument(doc); doc = new Document(); - doc.add(newField("nonexist", "phrase exist notexist exist found", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("nonexist", "phrase exist notexist exist found", TextField.TYPE_STORED)); writer.addDocument(doc); reader = writer.getReader(); @@ -223,7 +223,7 @@ public class TestPhraseQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig( Version.LUCENE_40, stopAnalyzer)); Document doc = new Document(); - doc.add(newField("field", "the stop words are here", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", "the stop words are here", TextField.TYPE_STORED)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); @@ -258,12 +258,12 @@ public class TestPhraseQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, directory); Document doc = new Document(); - doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("source", "marketing info", TextField.TYPE_STORED)); writer.addDocument(doc); doc = new Document(); - doc.add(newField("contents", "foobar", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("source", "marketing info", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("contents", "foobar", TextField.TYPE_STORED)); + doc.add(newField("source", "marketing info", TextField.TYPE_STORED)); writer.addDocument(doc); IndexReader reader = writer.getReader(); @@ -294,15 +294,15 @@ public class TestPhraseQuery extends LuceneTestCase { writer = new RandomIndexWriter(random, directory, newIndexWriterConfig( TEST_VERSION_CURRENT, new MockAnalyzer(random)).setOpenMode(OpenMode.CREATE)); doc = new Document(); - doc.add(newField("contents", "map entry woo", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("contents", "map entry woo", TextField.TYPE_STORED)); writer.addDocument(doc); doc = new Document(); - doc.add(newField("contents", "woo map entry", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("contents", "woo map entry", TextField.TYPE_STORED)); writer.addDocument(doc); doc = new Document(); - doc.add(newField("contents", "map foobarword entry woo", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("contents", "map foobarword entry woo", TextField.TYPE_STORED)); writer.addDocument(doc); reader = writer.getReader(); @@ -345,15 +345,15 @@ public class TestPhraseQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); - doc.add(newField("field", "foo firstname lastname foo", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", "foo firstname lastname foo", TextField.TYPE_STORED)); writer.addDocument(doc); Document doc2 = new Document(); - doc2.add(newField("field", "foo firstname zzz lastname foo", Field.Store.YES, Field.Index.ANALYZED)); + doc2.add(newField("field", "foo firstname zzz lastname foo", TextField.TYPE_STORED)); writer.addDocument(doc2); Document doc3 = new Document(); - doc3.add(newField("field", "foo firstname zzz yyy lastname foo", Field.Store.YES, Field.Index.ANALYZED)); + doc3.add(newField("field", "foo firstname zzz yyy lastname foo", TextField.TYPE_STORED)); writer.addDocument(doc3); IndexReader reader = writer.getReader(); @@ -608,7 +608,7 @@ public class TestPhraseQuery extends LuceneTestCase { RandomIndexWriter w = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, analyzer).setMergePolicy(newLogMergePolicy())); List> docs = new ArrayList>(); Document d = new Document(); - Field f = newField("f", "", Field.Store.NO, Field.Index.ANALYZED); + Field f = newField("f", "", TextField.TYPE_UNSTORED); d.add(f); Random r = random; diff --git a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java index 108c1a1561b..3cbe6610f46 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java +++ b/lucene/src/test/org/apache/lucene/search/TestPositionIncrement.java @@ -27,7 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexReader; @@ -91,7 +91,7 @@ public class TestPositionIncrement extends LuceneTestCase { Directory store = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, store, analyzer); Document d = new Document(); - d.add(newField("field", "bogus", Field.Store.YES, Field.Index.ANALYZED)); + d.add(newField("field", "bogus", TextField.TYPE_STORED)); writer.addDocument(d); IndexReader reader = writer.getReader(); writer.close(); @@ -209,7 +209,7 @@ public class TestPositionIncrement extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir, new MockPayloadAnalyzer()); Document doc = new Document(); - doc.add(new Field("content", new StringReader( + doc.add(new TextField("content", new StringReader( "a a b c d e a f g h i j a b k k"))); writer.addDocument(doc); diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java b/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java index 890ffe180e8..d3168b477b1 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixFilter.java @@ -23,7 +23,7 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; /** * Tests {@link PrefixFilter} class. @@ -40,7 +40,7 @@ public class TestPrefixFilter extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, directory); for (int i = 0; i < categories.length; i++) { Document doc = new Document(); - doc.add(newField("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("category", categories[i], StringField.TYPE_STORED)); writer.addDocument(doc); } IndexReader reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java b/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java index 10a1a69a124..673c7e89eaf 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixInBooleanQuery.java @@ -20,6 +20,7 @@ package org.apache.lucene.search; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -50,8 +51,7 @@ public class TestPrefixInBooleanQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, directory); Document doc = new Document(); - Field field = newField(FIELD, "meaninglessnames", Field.Store.NO, - Field.Index.NOT_ANALYZED_NO_NORMS); + Field field = newField(FIELD, "meaninglessnames", StringField.TYPE_UNSTORED); doc.add(field); for (int i = 0; i < 5137; ++i) { diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java b/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java index 5ab57283ec2..5103c31cbb0 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixQuery.java @@ -25,7 +25,7 @@ import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; /** * Tests {@link PrefixQuery} class. @@ -41,7 +41,7 @@ public class TestPrefixQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, directory); for (int i = 0; i < categories.length; i++) { Document doc = new Document(); - doc.add(newField("category", categories[i], Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("category", categories[i], StringField.TYPE_STORED)); writer.addDocument(doc); } IndexReader reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java index 7e7759e981c..35b1e151e30 100644 --- a/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java +++ b/lucene/src/test/org/apache/lucene/search/TestPrefixRandom.java @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; @@ -53,7 +54,7 @@ public class TestPrefixRandom extends LuceneTestCase { .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); - Field field = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); + Field field = newField("field", "", StringField.TYPE_UNSTORED); doc.add(field); // we generate aweful prefixes: good for testing. diff --git a/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java b/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java index 37d522c3c05..cf61e9ffcea 100644 --- a/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java +++ b/lucene/src/test/org/apache/lucene/search/TestQueryWrapperFilter.java @@ -18,8 +18,7 @@ package org.apache.lucene.search; */ import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -33,7 +32,7 @@ public class TestQueryWrapperFilter extends LuceneTestCase { Directory dir = newDirectory(); RandomIndexWriter writer = new RandomIndexWriter(random, dir); Document doc = new Document(); - doc.add(newField("field", "value", Store.NO, Index.ANALYZED)); + doc.add(newField("field", "value", TextField.TYPE_UNSTORED)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java b/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java index 75036679595..fe4a66e902e 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpQuery.java @@ -21,7 +21,7 @@ import java.io.IOException; import java.util.Arrays; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -50,7 +50,7 @@ public class TestRegexpQuery extends LuceneTestCase { Document doc = new Document(); doc.add(newField(FN, "the quick brown fox jumps over the lazy ??? dog 493432 49344", - Field.Store.NO, Field.Index.ANALYZED)); + TextField.TYPE_UNSTORED)); writer.addDocument(doc); reader = writer.getReader(); writer.close(); diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java index 1d8162a437a..0e53a814dc1 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom.java @@ -25,6 +25,8 @@ import java.util.Locale; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -51,7 +53,9 @@ public class TestRegexpRandom extends LuceneTestCase { .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); - Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED_NO_NORMS); + FieldType customType = new FieldType(TextField.TYPE_STORED); + customType.setOmitNorms(true); + Field field = newField("field", "", customType); doc.add(field); NumberFormat df = new DecimalFormat("000", new DecimalFormatSymbols(Locale.ENGLISH)); diff --git a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java index 4e9acbe71c4..e5ac627ddde 100644 --- a/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java +++ b/lucene/src/test/org/apache/lucene/search/TestRegexpRandom2.java @@ -26,6 +26,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -62,7 +63,7 @@ public class TestRegexpRandom2 extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.KEYWORD, false)) .setMaxBufferedDocs(_TestUtil.nextInt(random, 50, 1000))); Document doc = new Document(); - Field field = newField("field", "", Field.Store.NO, Field.Index.NOT_ANALYZED); + Field field = newField("field", "", StringField.TYPE_UNSTORED); doc.add(field); List terms = new ArrayList(); int num = atLeast(200); diff --git a/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java b/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java index f51795b6afa..61fe43fb0d0 100755 --- a/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java +++ b/lucene/src/test/org/apache/lucene/search/TestScorerPerf.java @@ -13,7 +13,7 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.Directory; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -64,7 +64,7 @@ public class TestScorerPerf extends LuceneTestCase { Document d = new Document(); for (int j=0; j= 0)) { // ensure first field is in order fail = true; System.out.println("fail:" + v[j] + " < " + last); } if (cmp == 0) { // ensure second field is in reverse order - cmp = v2[j].compareTo(lastSub); + cmp = v2[j].stringValue().compareTo(lastSub); if (cmp > 0) { fail = true; System.out.println("rev field fail:" + v2[j] + " > " + lastSub); @@ -409,8 +427,8 @@ public class TestSort extends LuceneTestCase { } } } - last = v[j]; - lastSub = v2[j]; + last = v[j].stringValue(); + lastSub = v2[j].stringValue(); lastDocId = result[x].doc; buff.append(v[j] + "(" + v2[j] + ")(" + result[x].doc+") "); } @@ -1051,9 +1069,9 @@ public class TestSort extends LuceneTestCase { int n = result.length; for (int i=0; i 1) { tester.values[i] = 10 + random.nextInt( 20 ); // get some field overlap - doc.add(newField(tester.field, String.valueOf(tester.values[i]), - Field.Store.NO, Field.Index.NOT_ANALYZED )); + FieldType customType = new FieldType(TextField.TYPE_UNSTORED); + customType.setTokenized(false); + doc.add(newField(tester.field, String.valueOf(tester.values[i]), customType)); } } writer.addDocument(doc); diff --git a/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java b/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java index d9b63440831..1c33f7acc28 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/PayloadHelper.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.Payload; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.util.English; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.index.IndexReader; @@ -122,9 +123,9 @@ public class PayloadHelper { // writer.infoStream = System.out; for (int i = 0; i < numDocs; i++) { Document doc = new Document(); - doc.add(new Field(FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field(MULTI_FIELD, English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field(NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field(FIELD, TextField.TYPE_STORED, English.intToEnglish(i))); + doc.add(new Field(MULTI_FIELD, TextField.TYPE_STORED, English.intToEnglish(i) + " " + English.intToEnglish(i))); + doc.add(new Field(NO_PAYLOAD_FIELD, TextField.TYPE_STORED, English.intToEnglish(i))); writer.addDocument(doc); } reader = IndexReader.open(writer, true); diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java index 13babc6418a..82564abcfdf 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadNearQuery.java @@ -21,7 +21,7 @@ import java.io.Reader; import org.apache.lucene.analysis.*; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.FieldInvertState; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Payload; @@ -115,9 +115,9 @@ public class TestPayloadNearQuery extends LuceneTestCase { //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); - doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", English.intToEnglish(i), TextField.TYPE_STORED)); String txt = English.intToEnglish(i) +' '+English.intToEnglish(i+1); - doc.add(newField("field2", txt, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field2", txt, TextField.TYPE_STORED)); writer.addDocument(doc); } reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java index bef3a8c35df..f8c6329f9d2 100644 --- a/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java +++ b/lucene/src/test/org/apache/lucene/search/payloads/TestPayloadTermQuery.java @@ -45,6 +45,7 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.apache.lucene.document.TextField; import java.io.Reader; import java.io.IOException; @@ -122,11 +123,11 @@ public class TestPayloadTermQuery extends LuceneTestCase { //writer.infoStream = System.out; for (int i = 0; i < 1000; i++) { Document doc = new Document(); - Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED); + Field noPayloadField = newField(PayloadHelper.NO_PAYLOAD_FIELD, English.intToEnglish(i), TextField.TYPE_STORED); //noPayloadField.setBoost(0); doc.add(noPayloadField); - doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", English.intToEnglish(i), TextField.TYPE_STORED)); + doc.add(newField("multiField", English.intToEnglish(i) + " " + English.intToEnglish(i), TextField.TYPE_STORED)); writer.addDocument(doc); } reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java index 49edd5d2e12..81e90e32b0e 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestBasics.java @@ -25,14 +25,13 @@ import java.util.Collections; import java.util.List; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Payload; import org.apache.lucene.index.RandomIndexWriter; @@ -120,7 +119,7 @@ public class TestBasics extends LuceneTestCase { //writer.infoStream = System.out; for (int i = 0; i < 2000; i++) { Document doc = new Document(); - doc.add(newField("field", English.intToEnglish(i), Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("field", English.intToEnglish(i), TextField.TYPE_STORED)); writer.addDocument(doc); } reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java b/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java index 012f9d5c48d..fa4c3e368e3 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestFieldMaskingSpanQuery.java @@ -23,6 +23,7 @@ import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -46,7 +47,7 @@ public class TestFieldMaskingSpanQuery extends LuceneTestCase { } protected static Field field(String name, String value) { - return newField(name, value, Field.Store.NO, Field.Index.ANALYZED); + return newField(name, value, TextField.TYPE_UNSTORED); } protected static IndexSearcher searcher; diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java b/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java index c70c93dd1f5..eaf4e08cc38 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestNearSpansOrdered.java @@ -19,7 +19,7 @@ package org.apache.lucene.search.spans; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexReader.ReaderContext; @@ -57,7 +57,7 @@ public class TestNearSpansOrdered extends LuceneTestCase { RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); - doc.add(newField(FIELD, docFields[i], Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField(FIELD, docFields[i], TextField.TYPE_UNSTORED)); writer.addDocument(doc); } reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java index b3d65e28799..6661eca1dfb 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestPayloadSpans.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.IndexReader; @@ -113,8 +113,7 @@ public class TestPayloadSpans extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = new Document(); - doc.add(newField(PayloadHelper.FIELD, "one two three one four three", - Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField(PayloadHelper.FIELD, "one two three one four three", TextField.TYPE_STORED)); writer.addDocument(doc); IndexReader reader = writer.getReader(); writer.close(); @@ -261,7 +260,7 @@ public class TestPayloadSpans extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); Document doc = new Document(); - doc.add(new Field("content", new StringReader("a b c d e f g h i j a k"))); + doc.add(new TextField("content", new StringReader("a b c d e f g h i j a k"))); writer.addDocument(doc); IndexReader reader = writer.getReader(); @@ -300,7 +299,7 @@ public class TestPayloadSpans extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); Document doc = new Document(); - doc.add(new Field("content", new StringReader("a b a d k f a h i k a k"))); + doc.add(new TextField("content", new StringReader("a b a d k f a h i k a k"))); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher is = newSearcher(reader); @@ -337,7 +336,7 @@ public class TestPayloadSpans extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new TestPayloadAnalyzer())); Document doc = new Document(); - doc.add(new Field("content", new StringReader("j k a l f k k p a t a k l k t a"))); + doc.add(new TextField("content", new StringReader("j k a l f k k p a t a k l k t a"))); writer.addDocument(doc); IndexReader reader = writer.getReader(); IndexSearcher is = newSearcher(reader); @@ -379,7 +378,7 @@ public class TestPayloadSpans extends LuceneTestCase { newIndexWriterConfig(TEST_VERSION_CURRENT, new PayloadAnalyzer()).setSimilarityProvider(similarity)); Document doc = new Document(); - doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField(PayloadHelper.FIELD,"xx rr yy mm pp", TextField.TYPE_STORED)); writer.addDocument(doc); IndexReader reader = writer.getReader(); @@ -443,7 +442,7 @@ public class TestPayloadSpans extends LuceneTestCase { for(int i = 0; i < docs.length; i++) { doc = new Document(); String docText = docs[i]; - doc.add(newField(PayloadHelper.FIELD,docText, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField(PayloadHelper.FIELD,docText, TextField.TYPE_STORED)); writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java index aa02222909d..981f61ba3fd 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpanFirstQuery.java @@ -21,7 +21,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -41,10 +41,10 @@ public class TestSpanFirstQuery extends LuceneTestCase { RandomIndexWriter writer = new RandomIndexWriter(random, dir, analyzer); Document doc = new Document(); - doc.add(newField("field", "the quick brown fox", Field.Index.ANALYZED)); + doc.add(newField("field", "the quick brown fox", TextField.TYPE_UNSTORED)); writer.addDocument(doc); Document doc2 = new Document(); - doc2.add(newField("field", "quick brown fox", Field.Index.ANALYZED)); + doc2.add(newField("field", "quick brown fox", TextField.TYPE_UNSTORED)); writer.addDocument(doc2); IndexReader reader = writer.getReader(); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java index 7c72eb2e79c..1db489897ff 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpanMultiTermQueryWrapper.java @@ -19,6 +19,7 @@ package org.apache.lucene.search.spans; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -42,7 +43,7 @@ public class TestSpanMultiTermQueryWrapper extends LuceneTestCase { directory = newDirectory(); RandomIndexWriter iw = new RandomIndexWriter(random, directory); Document doc = new Document(); - Field field = newField("field", "", Field.Store.NO, Field.Index.ANALYZED); + Field field = newField("field", "", TextField.TYPE_UNSTORED); doc.add(field); field.setValue("quick brown fox"); diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java index 393eb0e7c02..c222ceb4758 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpans.java @@ -38,7 +38,8 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.ReaderUtil; @@ -58,7 +59,7 @@ public class TestSpans extends LuceneTestCase { RandomIndexWriter writer= new RandomIndexWriter(random, directory, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); - doc.add(newField(field, docFields[i], Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField(field, docFields[i], TextField.TYPE_STORED)); writer.addDocument(doc); } reader = writer.getReader(); @@ -452,8 +453,8 @@ public class TestSpans extends LuceneTestCase { // LUCENE-1404 private void addDoc(IndexWriter writer, String id, String text) throws IOException { final Document doc = new Document(); - doc.add( newField("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED) ); - doc.add( newField("text", text, Field.Store.YES, Field.Index.ANALYZED) ); + doc.add( newField("id", id, StringField.TYPE_STORED) ); + doc.add( newField("text", text, TextField.TYPE_STORED) ); writer.addDocument(doc); } diff --git a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java index 7eea843b070..97591b7c1f9 100644 --- a/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java +++ b/lucene/src/test/org/apache/lucene/search/spans/TestSpansAdvanced.java @@ -25,7 +25,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -90,10 +91,8 @@ public class TestSpansAdvanced extends LuceneTestCase { final String text) throws IOException { final Document document = new Document(); - document.add(newField(FIELD_ID, id, Field.Store.YES, - Field.Index.NOT_ANALYZED)); - document.add(newField(FIELD_TEXT, text, Field.Store.YES, - Field.Index.ANALYZED)); + document.add(newField(FIELD_ID, id, StringField.TYPE_STORED)); + document.add(newField(FIELD_TEXT, text, TextField.TYPE_STORED)); writer.addDocument(document); } diff --git a/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java b/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java index 9d9957ec7a8..000bee7f689 100755 --- a/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java +++ b/lucene/src/test/org/apache/lucene/store/TestBufferedIndexInput.java @@ -28,7 +28,7 @@ import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -251,8 +251,8 @@ public class TestBufferedIndexInput extends LuceneTestCase { ); for(int i=0;i<37;i++) { Document doc = new Document(); - doc.add(newField("content", "aaa bbb ccc ddd" + i, Field.Store.YES, Field.Index.ANALYZED)); - doc.add(newField("id", "" + i, Field.Store.YES, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa bbb ccc ddd" + i, TextField.TYPE_STORED)); + doc.add(newField("id", "" + i, TextField.TYPE_STORED)); writer.addDocument(doc); } writer.close(); diff --git a/lucene/src/test/org/apache/lucene/store/TestLockFactory.java b/lucene/src/test/org/apache/lucene/store/TestLockFactory.java index d316cc2500e..ec8db9d5922 100755 --- a/lucene/src/test/org/apache/lucene/store/TestLockFactory.java +++ b/lucene/src/test/org/apache/lucene/store/TestLockFactory.java @@ -25,7 +25,7 @@ import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; @@ -415,7 +415,7 @@ public class TestLockFactory extends LuceneTestCase { private void addDoc(IndexWriter writer) throws IOException { Document doc = new Document(); - doc.add(newField("content", "aaa", Field.Store.NO, Field.Index.ANALYZED)); + doc.add(newField("content", "aaa", TextField.TYPE_UNSTORED)); writer.addDocument(doc); } } diff --git a/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java b/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java index ad1739a57b8..29ee6a62876 100644 --- a/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java +++ b/lucene/src/test/org/apache/lucene/store/TestMultiMMap.java @@ -23,6 +23,7 @@ import java.util.Random; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.util.BytesRef; @@ -123,8 +124,8 @@ public class TestMultiMMap extends LuceneTestCase { MockDirectoryWrapper dir = new MockDirectoryWrapper(random, mmapDir); RandomIndexWriter writer = new RandomIndexWriter(random, dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); Document doc = new Document(); - Field docid = newField("docid", "0", Field.Store.YES, Field.Index.NOT_ANALYZED); - Field junk = newField("junk", "", Field.Store.YES, Field.Index.NOT_ANALYZED); + Field docid = newField("docid", "0", StringField.TYPE_STORED); + Field junk = newField("junk", "", StringField.TYPE_STORED); doc.add(docid); doc.add(junk); diff --git a/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java b/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java index dadb66aa9e9..9b40d527061 100644 --- a/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java +++ b/lucene/src/test/org/apache/lucene/store/TestRAMDirectory.java @@ -19,15 +19,12 @@ package org.apache.lucene.store; import java.io.File; import java.io.IOException; -import java.io.ObjectOutput; -import java.io.ObjectOutputStream; -import java.io.ByteArrayOutputStream; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util._TestUtil; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -59,7 +56,7 @@ public class TestRAMDirectory extends LuceneTestCase { Document doc = null; for (int i = 0; i < docsToAdd; i++) { doc = new Document(); - doc.add(newField("content", English.intToEnglish(i).trim(), Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(newField("content", English.intToEnglish(i).trim(), StringField.TYPE_STORED)); writer.addDocument(doc); } assertEquals(docsToAdd, writer.maxDoc()); @@ -119,7 +116,7 @@ public class TestRAMDirectory extends LuceneTestCase { public void run() { for (int j=1; j(); // Initialize the map with the default fields. - fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyStore, bodyIndex, termVector)); - fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", store, index, termVector)); - fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector)); - fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); - fields.put(NAME_FIELD, new Field(NAME_FIELD, "", store, index, termVector)); + fields.put(BODY_FIELD, new Field(BODY_FIELD, bodyFt, "")); + fields.put(TITLE_FIELD, new Field(TITLE_FIELD, ft, "")); + fields.put(DATE_FIELD, new Field(DATE_FIELD, ft, "")); + fields.put(ID_FIELD, new Field(ID_FIELD, StringField.TYPE_STORED, "")); + fields.put(NAME_FIELD, new Field(NAME_FIELD, ft, "")); numericFields.put(DATE_MSEC_FIELD, new NumericField(DATE_MSEC_FIELD)); numericFields.put(TIME_SEC_FIELD, new NumericField(TIME_SEC_FIELD)); @@ -125,14 +125,14 @@ public class DocMaker { * reuseFields was set to true, then it attempts to reuse a * Field instance. If such a field does not exist, it creates a new one. */ - Field getField(String name, Store store, Index index, TermVector termVector) { + Field getField(String name, FieldType ft) { if (!reuseFields) { - return new Field(name, "", store, index, termVector); + return new Field(name, ft, ""); } Field f = fields.get(name); if (f == null) { - f = new Field(name, "", store, index, termVector); + f = new Field(name, ft, ""); fields.put(name, f); } return f; @@ -179,12 +179,9 @@ public class DocMaker { protected Config config; - protected Store storeVal = Store.NO; - protected Store bodyStoreVal = Store.NO; - protected Index indexVal = Index.ANALYZED_NO_NORMS; - protected Index bodyIndexVal = Index.ANALYZED; - protected TermVector termVecVal = TermVector.NO; - + protected final FieldType valType; + protected final FieldType bodyValType; + protected ContentSource source; protected boolean reuseFields; protected boolean indexProperties; @@ -196,6 +193,13 @@ public class DocMaker { private int printNum = 0; + public DocMaker() { + valType = new FieldType(TextField.TYPE_UNSTORED); + valType.setOmitNorms(true); + + bodyValType = new FieldType(TextField.TYPE_UNSTORED); + } + // create a doc // use only part of the body, modify it to keep the rest (or use all if size==0). // reset the docdata properties so they are not added more than once. @@ -206,7 +210,10 @@ public class DocMaker { doc.getFields().clear(); // Set ID_FIELD - Field idField = ds.getField(ID_FIELD, storeVal, Index.NOT_ANALYZED_NO_NORMS, termVecVal); + FieldType ft = new FieldType(valType); + ft.setIndexed(true); + + Field idField = ds.getField(ID_FIELD, ft); int id; if (r != null) { id = r.nextInt(updateDocIDLimit); @@ -223,7 +230,7 @@ public class DocMaker { String name = docData.getName(); if (name == null) name = ""; name = cnt < 0 ? name : name + "_" + cnt; - Field nameField = ds.getField(NAME_FIELD, storeVal, indexVal, termVecVal); + Field nameField = ds.getField(NAME_FIELD, valType); nameField.setValue(name); doc.add(nameField); @@ -242,7 +249,7 @@ public class DocMaker { } else { dateString = ""; } - Field dateStringField = ds.getField(DATE_FIELD, storeVal, indexVal, termVecVal); + Field dateStringField = ds.getField(DATE_FIELD, valType); dateStringField.setValue(dateString); doc.add(dateStringField); @@ -264,7 +271,7 @@ public class DocMaker { // Set TITLE_FIELD String title = docData.getTitle(); - Field titleField = ds.getField(TITLE_FIELD, storeVal, indexVal, termVecVal); + Field titleField = ds.getField(TITLE_FIELD, valType); titleField.setValue(title == null ? "" : title); doc.add(titleField); @@ -285,12 +292,12 @@ public class DocMaker { bdy = body.substring(0, size); // use part docData.setBody(body.substring(size)); // some left } - Field bodyField = ds.getField(BODY_FIELD, bodyStoreVal, bodyIndexVal, termVecVal); + Field bodyField = ds.getField(BODY_FIELD, bodyValType); bodyField.setValue(bdy); doc.add(bodyField); if (storeBytes) { - Field bytesField = ds.getField(BYTES_FIELD, Store.YES, Index.NOT_ANALYZED_NO_NORMS, TermVector.NO); + Field bytesField = ds.getField(BYTES_FIELD, StringField.TYPE_STORED); bytesField.setValue(bdy.getBytes("UTF-8")); doc.add(bytesField); } @@ -300,7 +307,7 @@ public class DocMaker { Properties props = docData.getProps(); if (props != null) { for (final Map.Entry entry : props.entrySet()) { - Field f = ds.getField((String) entry.getKey(), storeVal, indexVal, termVecVal); + Field f = ds.getField((String) entry.getKey(), valType); f.setValue((String) entry.getValue()); doc.add(f); } @@ -319,7 +326,7 @@ public class DocMaker { protected DocState getDocState() { DocState ds = docState.get(); if (ds == null) { - ds = new DocState(reuseFields, storeVal, bodyStoreVal, indexVal, bodyIndexVal, termVecVal); + ds = new DocState(reuseFields, valType, bodyValType); docState.set(ds); } return ds; @@ -455,33 +462,23 @@ public class DocMaker { boolean norms = config.get("doc.tokenized.norms", false); boolean bodyNorms = config.get("doc.body.tokenized.norms", true); boolean termVec = config.get("doc.term.vector", false); - storeVal = (stored ? Field.Store.YES : Field.Store.NO); - bodyStoreVal = (bodyStored ? Field.Store.YES : Field.Store.NO); - if (tokenized) { - indexVal = norms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS; - } else { - indexVal = norms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS; - } - - if (bodyTokenized) { - bodyIndexVal = bodyNorms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS; - } else { - bodyIndexVal = bodyNorms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS; - } - boolean termVecPositions = config.get("doc.term.vector.positions", false); boolean termVecOffsets = config.get("doc.term.vector.offsets", false); - if (termVecPositions && termVecOffsets) { - termVecVal = TermVector.WITH_POSITIONS_OFFSETS; - } else if (termVecPositions) { - termVecVal = TermVector.WITH_POSITIONS; - } else if (termVecOffsets) { - termVecVal = TermVector.WITH_OFFSETS; - } else if (termVec) { - termVecVal = TermVector.YES; - } else { - termVecVal = TermVector.NO; - } + + valType.setStored(stored); + bodyValType.setStored(bodyStored); + valType.setTokenized(tokenized); + valType.setOmitNorms(!norms); + bodyValType.setTokenized(bodyTokenized); + bodyValType.setOmitNorms(!bodyNorms); + + valType.setStoreTermVectors(termVec); + valType.setStoreTermVectorPositions(termVecPositions); + valType.setStoreTermVectorOffsets(termVecOffsets); + bodyValType.setStoreTermVectors(termVec); + bodyValType.setStoreTermVectorPositions(termVecPositions); + bodyValType.setStoreTermVectorOffsets(termVecOffsets); + storeBytes = config.get("doc.store.body.bytes", false); reuseFields = config.get("doc.reuse.fields", true); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java index c7b642361a7..e3065b1c044 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTask.java @@ -28,8 +28,8 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.QueryMaker; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.MultiFields; import org.apache.lucene.search.Collector; import org.apache.lucene.search.TopDocs; @@ -300,10 +300,10 @@ public abstract class ReadTask extends PerfTask { * @return A Collection of Field names (Strings) */ protected Collection getFieldsToHighlight(Document document) { - List fieldables = document.getFields(); - Set result = new HashSet(fieldables.size()); - for (final Fieldable fieldable : fieldables) { - result.add(fieldable.name()); + List fields = document.getFields(); + Set result = new HashSet(fields.size()); + for (final IndexableField f : fields) { + result.add(f.name()); } return result; } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java index f702cb8ee7f..7efe1116d47 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java @@ -26,8 +26,8 @@ import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.benchmark.byTask.PerfRunData; import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.NumericField; +import org.apache.lucene.index.IndexableField; /** * Simple task to test performance of tokenizers. It just @@ -65,11 +65,11 @@ public class ReadTokensTask extends PerfTask { @Override public int doLogic() throws Exception { - List fields = doc.getFields(); + List fields = doc.getFields(); Analyzer analyzer = getRunData().getAnalyzer(); int tokenCount = 0; - for(final Fieldable field : fields) { - if (!field.isTokenized() || field instanceof NumericField) continue; + for(final IndexableField field : fields) { + if (!field.tokenized() || field instanceof NumericField) continue; final TokenStream stream; final TokenStream streamValue = field.tokenStreamValue(); diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java index 26050b4696f..7ab732313f5 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/SearchTravRetLoadFieldSelectorTask.java @@ -16,20 +16,19 @@ package org.apache.lucene.benchmark.byTask.tasks; */ +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; +import java.util.StringTokenizer; + import org.apache.lucene.benchmark.byTask.PerfRunData; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.SetBasedFieldSelector; import org.apache.lucene.document.Document; +import org.apache.lucene.index.DocumentStoredFieldVisitor; import org.apache.lucene.index.IndexReader; -import java.util.StringTokenizer; -import java.util.Set; -import java.util.HashSet; -import java.util.Collections; -import java.io.IOException; - /** - * Search and Traverse and Retrieve docs task using a SetBasedFieldSelector. + * Search and Traverse and Retrieve docs task using a + * FieldVisitor loading only the requested fields. * *

      Note: This task reuses the reader if it is already open. * Otherwise a reader is opened at start and closed at the end. @@ -41,7 +40,8 @@ import java.io.IOException; */ public class SearchTravRetLoadFieldSelectorTask extends SearchTravTask { - protected FieldSelector fieldSelector; + protected Set fieldsToLoad; + public SearchTravRetLoadFieldSelectorTask(PerfRunData runData) { super(runData); @@ -55,18 +55,23 @@ public class SearchTravRetLoadFieldSelectorTask extends SearchTravTask { @Override protected Document retrieveDoc(IndexReader ir, int id) throws IOException { - return ir.document(id, fieldSelector); + if (fieldsToLoad == null) { + return ir.document(id); + } else { + DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); + ir.document(id, visitor); + return visitor.getDocument(); + } } @Override public void setParams(String params) { this.params = params; // cannot just call super.setParams(), b/c it's params differ. - Set fieldsToLoad = new HashSet(); + fieldsToLoad = new HashSet(); for (StringTokenizer tokenizer = new StringTokenizer(params, ","); tokenizer.hasMoreTokens();) { String s = tokenizer.nextToken(); fieldsToLoad.add(s); } - fieldSelector = new SetBasedFieldSelector(fieldsToLoad, Collections. emptySet()); } diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java index 90cb4a3ee20..aac2fd98c72 100644 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.java @@ -32,7 +32,7 @@ import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.StreamUtils; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexableField; /** * A task which writes documents, one line per document. Each line is in the @@ -172,7 +172,7 @@ public class WriteLineDocTask extends PerfTask { boolean sufficient = !checkSufficientFields; for (int i=0; i0 && sufficientFields[i]; diff --git a/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java b/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java index 8ab80ab1767..e443ef5e672 100755 --- a/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java +++ b/modules/benchmark/src/java/org/apache/lucene/benchmark/quality/utils/DocNameExtractor.java @@ -17,18 +17,20 @@ package org.apache.lucene.benchmark.quality.utils; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.IndexInput; /** * Utility: extract doc names from an index */ public class DocNameExtractor { - private FieldSelector fldSel; - private String docNameField; + private final String docNameField; /** * Constructor for DocNameExtractor. @@ -36,13 +38,6 @@ public class DocNameExtractor { */ public DocNameExtractor (final String docNameField) { this.docNameField = docNameField; - fldSel = new FieldSelector() { - public FieldSelectorResult accept(String fieldName) { - return fieldName.equals(docNameField) ? - FieldSelectorResult.LOAD_AND_BREAK : - FieldSelectorResult.NO_LOAD; - } - }; } /** @@ -53,7 +48,25 @@ public class DocNameExtractor { * @throws IOException if cannot extract the doc name from the index. */ public String docName(IndexSearcher searcher, int docid) throws IOException { - return searcher.doc(docid,fldSel).get(docNameField); + final List name = new ArrayList(); + searcher.getIndexReader().document(docid, new StoredFieldVisitor() { + @Override + public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException { + if (fieldInfo.name.equals(docNameField) && name.size() == 0) { + final byte[] b = new byte[numUTF8Bytes]; + in.readBytes(b, 0, b.length); + name.add(new String(b, "UTF-8")); + } else { + in.seek(in.getFilePointer() + numUTF8Bytes); + } + return false; + } + }); + if (name.size() != 0) { + return name.get(0); + } else { + return null; + } } } diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java index eb0ed42c09c..86cb0915d92 100644 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/feeds/DocMakerTest.java @@ -20,7 +20,6 @@ package org.apache.lucene.benchmark.byTask.feeds; import java.io.IOException; import java.util.Properties; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.benchmark.BenchmarkTestCase; import org.apache.lucene.benchmark.byTask.PerfRunData; @@ -137,28 +136,28 @@ public class DocMakerTest extends BenchmarkTestCase { // Don't set anything, use the defaults doc = createTestNormsDocument(false, false, false, false); - assertTrue(doc.getField(DocMaker.TITLE_FIELD).getOmitNorms()); - assertFalse(doc.getField(DocMaker.BODY_FIELD).getOmitNorms()); + assertTrue(doc.getField(DocMaker.TITLE_FIELD).omitNorms()); + assertFalse(doc.getField(DocMaker.BODY_FIELD).omitNorms()); // Set norms to false doc = createTestNormsDocument(true, false, false, false); - assertTrue(doc.getField(DocMaker.TITLE_FIELD).getOmitNorms()); - assertFalse(doc.getField(DocMaker.BODY_FIELD).getOmitNorms()); + assertTrue(doc.getField(DocMaker.TITLE_FIELD).omitNorms()); + assertFalse(doc.getField(DocMaker.BODY_FIELD).omitNorms()); // Set norms to true doc = createTestNormsDocument(true, true, false, false); - assertFalse(doc.getField(DocMaker.TITLE_FIELD).getOmitNorms()); - assertFalse(doc.getField(DocMaker.BODY_FIELD).getOmitNorms()); + assertFalse(doc.getField(DocMaker.TITLE_FIELD).omitNorms()); + assertFalse(doc.getField(DocMaker.BODY_FIELD).omitNorms()); // Set body norms to false doc = createTestNormsDocument(false, false, true, false); - assertTrue(doc.getField(DocMaker.TITLE_FIELD).getOmitNorms()); - assertTrue(doc.getField(DocMaker.BODY_FIELD).getOmitNorms()); + assertTrue(doc.getField(DocMaker.TITLE_FIELD).omitNorms()); + assertTrue(doc.getField(DocMaker.BODY_FIELD).omitNorms()); // Set body norms to true doc = createTestNormsDocument(false, false, true, true); - assertTrue(doc.getField(DocMaker.TITLE_FIELD).getOmitNorms()); - assertFalse(doc.getField(DocMaker.BODY_FIELD).getOmitNorms()); + assertTrue(doc.getField(DocMaker.TITLE_FIELD).omitNorms()); + assertFalse(doc.getField(DocMaker.BODY_FIELD).omitNorms()); } } diff --git a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java index 09e055229cd..c133f96e9fc 100644 --- a/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java +++ b/modules/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTaskTest.java @@ -33,9 +33,7 @@ import org.apache.lucene.benchmark.byTask.feeds.DocMaker; import org.apache.lucene.benchmark.byTask.utils.Config; import org.apache.lucene.benchmark.byTask.utils.StreamUtils.Type; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.StringField; /** Tests the functionality of {@link WriteLineDocTask}. */ public class WriteLineDocTaskTest extends BenchmarkTestCase { @@ -46,9 +44,9 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { @Override public Document makeDocument() throws Exception { Document doc = new Document(); - doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(TITLE_FIELD, "title", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField(BODY_FIELD, "body")); + doc.add(new StringField(TITLE_FIELD, "title")); + doc.add(new StringField(DATE_FIELD, "date")); return doc; } @@ -60,9 +58,9 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { @Override public Document makeDocument() throws Exception { Document doc = new Document(); - doc.add(new Field(BODY_FIELD, "body\r\ntext\ttwo", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(TITLE_FIELD, "title\r\ntext", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(DATE_FIELD, "date\r\ntext", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField(BODY_FIELD, "body\r\ntext\ttwo")); + doc.add(new StringField(TITLE_FIELD, "title\r\ntext")); + doc.add(new StringField(DATE_FIELD, "date\r\ntext")); return doc; } @@ -73,8 +71,8 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { @Override public Document makeDocument() throws Exception { Document doc = new Document(); - doc.add(new Field(TITLE_FIELD, "title", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField(TITLE_FIELD, "title")); + doc.add(new StringField(DATE_FIELD, "date")); return doc; } } @@ -84,8 +82,8 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { @Override public Document makeDocument() throws Exception { Document doc = new Document(); - doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField(BODY_FIELD, "body")); + doc.add(new StringField(DATE_FIELD, "date")); return doc; } } @@ -95,7 +93,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { @Override public Document makeDocument() throws Exception { Document doc = new Document(); - doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField(DATE_FIELD, "date")); return doc; } } @@ -106,7 +104,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { @Override public Document makeDocument() throws Exception { Document doc = new Document(); - doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField(DATE_FIELD, "date")); return doc; } } @@ -126,9 +124,9 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase { public Document makeDocument() throws Exception { Document doc = new Document(); String name = Thread.currentThread().getName(); - doc.add(new Field(BODY_FIELD, "body_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(TITLE_FIELD, "title_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); - doc.add(new Field(DATE_FIELD, "date_" + name, Store.NO, Index.NOT_ANALYZED_NO_NORMS)); + doc.add(new StringField(BODY_FIELD, "body_" + name)); + doc.add(new StringField(TITLE_FIELD, "title_" + name)); + doc.add(new StringField(DATE_FIELD, "date_" + name)); return doc; } diff --git a/modules/facet/src/examples/org/apache/lucene/facet/example/association/AssociationIndexer.java b/modules/facet/src/examples/org/apache/lucene/facet/example/association/AssociationIndexer.java index c6bebaab08e..6d8eb9308e9 100644 --- a/modules/facet/src/examples/org/apache/lucene/facet/example/association/AssociationIndexer.java +++ b/modules/facet/src/examples/org/apache/lucene/facet/example/association/AssociationIndexer.java @@ -2,8 +2,7 @@ package org.apache.lucene.facet.example.association; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -94,10 +93,8 @@ public class AssociationIndexer { // create a plain Lucene document and add some regular Lucene fields // to it Document doc = new Document(); - doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum], - Store.YES, Index.ANALYZED)); - doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum], - Store.NO, Index.ANALYZED)); + doc.add(new Field(SimpleUtils.TITLE, TextField.TYPE_STORED, SimpleUtils.docTitles[docNum])); + doc.add(new TextField(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum])); // invoke the category document builder for adding categories to the // document and, diff --git a/modules/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java b/modules/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java index d9972221c1b..be3114ba389 100644 --- a/modules/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java +++ b/modules/facet/src/examples/org/apache/lucene/facet/example/multiCL/MultiCLIndexer.java @@ -6,8 +6,7 @@ import java.util.Random; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.Term; @@ -174,8 +173,8 @@ public class MultiCLIndexer { // create a plain Lucene document and add some regular Lucene fields // to it Document doc = new Document(); - doc.add(new Field(SimpleUtils.TITLE, docTitles[docNum], Store.YES, Index.ANALYZED)); - doc.add(new Field(SimpleUtils.TEXT, docTexts[docNum], Store.NO, Index.ANALYZED)); + doc.add(new Field(SimpleUtils.TITLE, TextField.TYPE_STORED, docTitles[docNum])); + doc.add(new TextField(SimpleUtils.TEXT, docTexts[docNum])); // finally add the document to the index categoryDocBuilder.build(doc); diff --git a/modules/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleIndexer.java b/modules/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleIndexer.java index cc5b803b260..78b47208dde 100644 --- a/modules/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleIndexer.java +++ b/modules/facet/src/examples/org/apache/lucene/facet/example/simple/SimpleIndexer.java @@ -5,8 +5,7 @@ import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -71,8 +70,8 @@ public class SimpleIndexer { // create a plain Lucene document and add some regular Lucene fields to it Document doc = new Document(); - doc.add(new Field(SimpleUtils.TITLE, SimpleUtils.docTitles[docNum], Store.YES, Index.ANALYZED)); - doc.add(new Field(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum], Store.NO, Index.ANALYZED)); + doc.add(new Field(SimpleUtils.TITLE, TextField.TYPE_STORED, SimpleUtils.docTitles[docNum])); + doc.add(new TextField(SimpleUtils.TEXT, SimpleUtils.docTexts[docNum])); // invoke the category document builder for adding categories to the document and, // as required, to the taxonomy index diff --git a/modules/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java b/modules/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java index 64c1d5a0335..2a21640bf0a 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java +++ b/modules/facet/src/java/org/apache/lucene/facet/index/CategoryDocumentBuilder.java @@ -10,6 +10,8 @@ import java.util.Map.Entry; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.DocumentBuilder; import org.apache.lucene.facet.index.attributes.CategoryAttribute; @@ -183,7 +185,9 @@ public class CategoryDocumentBuilder implements DocumentBuilder { // Finally creating a suitable field with stream and adding it to a // master field-list, used during the build process (see // super.build()) - fieldList.add(new Field(e.getKey(), stream)); + FieldType ft = new FieldType(TextField.TYPE_UNSTORED); + ft.setOmitNorms(true); + fieldList.add(new Field(e.getKey(), ft, stream)); } return this; @@ -289,7 +293,6 @@ public class CategoryDocumentBuilder implements DocumentBuilder { */ public Document build(Document doc) { for (Field f : fieldList) { - f.setOmitNorms(true); doc.add(f); } return doc; diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/Consts.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/Consts.java index 9c53c48f6c7..2a91eb372ac 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/Consts.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/Consts.java @@ -1,7 +1,10 @@ package org.apache.lucene.facet.taxonomy.lucene; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; +import java.io.IOException; + +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.store.IndexInput; /** * Licensed to the Apache Software Foundation (ASF) under one or more @@ -31,17 +34,26 @@ abstract class Consts { static final char[] PAYLOAD_PARENT_CHARS = PAYLOAD_PARENT.toCharArray(); /** - * The following is a "field selector", an object which tells Lucene to - * extract only a single field rather than a whole document. + * The following is a "stored field visitor", an object + * which tells Lucene to extract only a single field + * rather than a whole document. */ - public static final FieldSelector fullPathSelector = new FieldSelector() { - public FieldSelectorResult accept(String fieldName) { - if (fieldName.equals(FULL)) { - return FieldSelectorResult.LOAD_AND_BREAK; - } - return FieldSelectorResult.NO_LOAD; - } - }; + public static final class LoadFullPathOnly extends StoredFieldVisitor { + private String fullPath; + + public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException { + final byte[] bytes = new byte[numUTF8Bytes]; + in.readBytes(bytes, 0, bytes.length); + fullPath = new String(bytes, "UTF-8"); + + // Stop loading: + return true; + } + + public String getFullPath() { + return fullPath; + } + } /** * Delimiter used for creating the full path of a category from the list of diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java index ca141c79e74..eb8a1556ef7 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyReader.java @@ -13,7 +13,6 @@ import java.util.logging.Logger; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; -import org.apache.lucene.index.Term; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; @@ -21,6 +20,7 @@ import org.apache.lucene.store.FSDirectory; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.lucene.Consts.LoadFullPathOnly; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.collections.LRUHashMap; @@ -295,8 +295,9 @@ public class LuceneTaxonomyReader implements TaxonomyReader { if (catID<0 || catID>=indexReader.maxDoc()) { return null; } - ret = indexReader.document(catID, Consts.fullPathSelector) - .get(Consts.FULL); + final LoadFullPathOnly loader = new LoadFullPathOnly(); + indexReader.document(catID, loader); + ret = loader.getFullPath(); } finally { indexReaderLock.readLock().unlock(); } diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java index 4d009d8a9a9..c3a9831b270 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/lucene/LuceneTaxonomyWriter.java @@ -17,11 +17,11 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -179,10 +179,10 @@ public class LuceneTaxonomyWriter implements TaxonomyWriter { openLuceneIndex(directory, openMode); reader = null; - parentStreamField = new Field(Consts.FIELD_PAYLOADS, parentStream); - parentStreamField.setOmitNorms(true); - fullPathField = new Field(Consts.FULL, "", Store.YES, Index.NOT_ANALYZED_NO_NORMS); - fullPathField.setIndexOptions(IndexOptions.DOCS_ONLY); + FieldType ft = new FieldType(TextField.TYPE_UNSTORED); + ft.setOmitNorms(true); + parentStreamField = new Field(Consts.FIELD_PAYLOADS, ft, parentStream); + fullPathField = new Field(Consts.FULL, StringField.TYPE_STORED, ""); this.nextID = indexWriter.maxDoc(); diff --git a/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java b/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java index 37b0a019cb4..bf711a40f0a 100644 --- a/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java +++ b/modules/facet/src/test/org/apache/lucene/facet/FacetTestBase.java @@ -15,9 +15,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; @@ -247,7 +245,7 @@ public abstract class FacetTestBase extends LuceneTestCase { CategoryDocumentBuilder builder = new CategoryDocumentBuilder(tw, iParams); builder.setCategoryPaths(categories); builder.build(d); - d.add(new Field("content", content, Store.YES, Index.ANALYZED, TermVector.NO)); + d.add(new Field("content", TextField.TYPE_STORED, content)); iw.addDocument(d); } diff --git a/modules/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java b/modules/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java index babc92f90d4..0b12f332fd2 100644 --- a/modules/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java +++ b/modules/facet/src/test/org/apache/lucene/facet/FacetTestUtils.java @@ -7,9 +7,7 @@ import java.util.Collection; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; @@ -130,8 +128,7 @@ public class FacetTestUtils { cps.add(cp); Document d = new Document(); new CategoryDocumentBuilder(tw, iParams).setCategoryPaths(cps).build(d); - d.add(new Field("content", "alpha", Store.YES, Index.ANALYZED, - TermVector.NO)); + d.add(new Field("content", TextField.TYPE_STORED, "alpha")); iw.addDocument(d); } diff --git a/modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java b/modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java index 1358a701f4d..20b28a93eed 100644 --- a/modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java +++ b/modules/facet/src/test/org/apache/lucene/facet/search/CategoryListIteratorTest.java @@ -12,7 +12,7 @@ import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Payload; import org.apache.lucene.index.RandomIndexWriter; @@ -21,8 +21,6 @@ import org.apache.lucene.store.Directory; import org.junit.Test; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.facet.search.CategoryListIterator; -import org.apache.lucene.facet.search.PayloadIntDecodingIterator; import org.apache.lucene.util.UnsafeByteArrayOutputStream; import org.apache.lucene.util.encoding.DGapIntEncoder; import org.apache.lucene.util.encoding.IntEncoder; @@ -104,7 +102,7 @@ public class CategoryListIteratorTest extends LuceneTestCase { for (int i = 0; i < data.length; i++) { dts.setIdx(i); Document doc = new Document(); - doc.add(new Field("f", dts)); + doc.add(new TextField("f", dts)); writer.addDocument(doc); } IndexReader reader = writer.getReader(); @@ -155,10 +153,10 @@ public class CategoryListIteratorTest extends LuceneTestCase { dts.setIdx(i); Document doc = new Document(); if (i==0 || i == 2) { - doc.add(new Field("f", dts)); // only docs 0 & 2 have payloads! + doc.add(new TextField("f", dts)); // only docs 0 & 2 have payloads! } dts2.setIdx(i); - doc.add(new Field("f", dts2)); + doc.add(new TextField("f", dts2)); writer.addDocument(doc); writer.commit(); } @@ -168,7 +166,7 @@ public class CategoryListIteratorTest extends LuceneTestCase { for (int i = 0; i < 10; ++i) { Document d = new Document(); dts.setIdx(2); - d.add(new Field("f", dts2)); + d.add(new TextField("f", dts2)); writer.addDocument(d); if (i %10 == 0) { writer.commit(); diff --git a/modules/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java b/modules/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java index e6d5eab358b..20fdc920699 100644 --- a/modules/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java +++ b/modules/facet/src/test/org/apache/lucene/facet/search/DrillDownTest.java @@ -6,12 +6,9 @@ import java.util.ArrayList; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; @@ -28,7 +25,6 @@ import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.facet.index.CategoryDocumentBuilder; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; -import org.apache.lucene.facet.search.DrillDown; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; @@ -84,10 +80,10 @@ public class DrillDownTest extends LuceneTestCase { ArrayList paths = new ArrayList(); Document doc = new Document(); if (i % 2 == 0) { // 50 - doc.add(new Field("content", "foo", Store.NO, Index.ANALYZED)); + doc.add(new TextField("content", "foo")); } if (i % 3 == 0) { // 33 - doc.add(new Field("content", "bar", Store.NO, Index.ANALYZED)); + doc.add(new TextField("content", "bar")); } if (i % 4 == 0) { // 25 paths.add(new CategoryPath("a")); diff --git a/modules/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java b/modules/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java index eb588830a37..541651e25d0 100644 --- a/modules/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java +++ b/modules/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java @@ -7,9 +7,7 @@ import java.util.List; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; -import org.apache.lucene.document.Field.TermVector; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriterConfig.OpenMode; @@ -24,11 +22,6 @@ import org.junit.Test; import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.facet.index.CategoryDocumentBuilder; import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams; -import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.FloatArrayAllocator; -import org.apache.lucene.facet.search.IntArrayAllocator; -import org.apache.lucene.facet.search.ScoredDocIdCollector; -import org.apache.lucene.facet.search.StandardFacetsAccumulator; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; @@ -335,7 +328,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase { cps.add(cp); Document d = new Document(); new CategoryDocumentBuilder(tw, iParams).setCategoryPaths(cps).build(d); - d.add(new Field("content", "alpha", Store.YES, Index.ANALYZED, TermVector.NO)); + d.add(new Field("content", TextField.TYPE_STORED, "alpha")); iw.addDocument(d); } diff --git a/modules/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java b/modules/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java index 3792662d255..870d62dbbe0 100644 --- a/modules/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java +++ b/modules/facet/src/test/org/apache/lucene/facet/util/TestScoredDocIDsUtils.java @@ -7,8 +7,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; -import org.apache.lucene.document.Field.Store; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.RandomIndexWriter; @@ -134,7 +134,7 @@ public class TestScoredDocIDsUtils extends LuceneTestCase { int docNum = it.getDocID(); assertNull( "Deleted docs must not appear in the allDocsScoredDocIds set: " + docNum, - reader.document(docNum).getFieldable("del")); + reader.document(docNum).getField("del")); } assertEquals("Wrong number of (live) documents", allDocs.size(), numIteratedDocs); @@ -166,7 +166,7 @@ public class TestScoredDocIDsUtils extends LuceneTestCase { live != null && !live.get(docNum)); assertNull( "Complement-Set must not contain docs from the original set (doc="+ docNum+")", - reader.document(docNum).getFieldable("del")); + reader.document(docNum).getField("del")); assertFalse( "Complement-Set must not contain docs from the original set (doc="+docNum+")", resultSet.fastGet(docNum)); @@ -189,8 +189,8 @@ public class TestScoredDocIDsUtils extends LuceneTestCase { protected final static String delTxt = "delete"; protected final static String alphaTxt = "alpha"; - private final static Field deletionMark = new Field(field, delTxt, Store.NO, Index.NOT_ANALYZED_NO_NORMS); - private final static Field alphaContent = new Field(field, alphaTxt, Store.NO, Index.NOT_ANALYZED_NO_NORMS); + private final static Field deletionMark = new StringField(field, delTxt); + private final static Field alphaContent = new StringField(field, alphaTxt); protected final int numDocs; @@ -208,7 +208,9 @@ public class TestScoredDocIDsUtils extends LuceneTestCase { doc.add(deletionMark); // Add a special field for docs that are marked for deletion. Later we // assert that those docs are not returned by all-scored-doc-IDs. - doc.add(new Field("del", Integer.toString(docNum), Store.YES, Index.NO)); + FieldType ft = new FieldType(); + ft.setStored(true); + doc.add(new Field("del", ft, Integer.toString(docNum))); } if (haveAlpha(docNum)) { diff --git a/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html b/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html index 867710482df..4d9e4bc657a 100644 --- a/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html +++ b/modules/grouping/src/java/org/apache/lucene/search/grouping/package.html @@ -130,7 +130,7 @@ field fall into a single group.

      List<Document> oneGroup = ...; Field groupEndField = new Field("groupEnd", "x", Field.Store.NO, Field.Index.NOT_ANALYZED); - groupEndField.setOmitTermFreqAndPositions(true); + groupEndField.setIndexOptions(IndexOptions.DOCS_ONLY); groupEndField.setOmitNorms(true); oneGroup.get(oneGroup.size()-1).add(groupEndField); diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java index 6ca0e4e5146..1dd64017618 100644 --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupHeadsCollectorTest.java @@ -21,6 +21,8 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -47,57 +49,57 @@ public class TermAllGroupHeadsCollectorTest extends LuceneTestCase { // 0 Document doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField(groupField, "author1", TextField.TYPE_STORED)); + doc.add(newField("content", "random text", TextField.TYPE_STORED)); + doc.add(newField("id", "1", StringField.TYPE_STORED)); w.addDocument(doc); // 1 doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text blob", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "2", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField(groupField, "author1", TextField.TYPE_STORED)); + doc.add(newField("content", "some more random text blob", TextField.TYPE_STORED)); + doc.add(newField("id", "2", StringField.TYPE_STORED)); w.addDocument(doc); // 2 doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random textual data", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "3", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField(groupField, "author1", TextField.TYPE_STORED)); + doc.add(newField("content", "some more random textual data", TextField.TYPE_STORED)); + doc.add(newField("id", "3", StringField.TYPE_STORED)); w.addDocument(doc); w.commit(); // To ensure a second segment // 3 doc = new Document(); - doc.add(new Field(groupField, "author2", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "4", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField(groupField, "author2", TextField.TYPE_STORED)); + doc.add(newField("content", "some random text", TextField.TYPE_STORED)); + doc.add(newField("id", "4", StringField.TYPE_STORED)); w.addDocument(doc); // 4 doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "5", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField(groupField, "author3", TextField.TYPE_STORED)); + doc.add(newField("content", "some more random text", TextField.TYPE_STORED)); + doc.add(newField("id", "5", StringField.TYPE_STORED)); w.addDocument(doc); // 5 doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random blob", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField(groupField, "author3", TextField.TYPE_STORED)); + doc.add(newField("content", "random blob", TextField.TYPE_STORED)); + doc.add(newField("id", "6", StringField.TYPE_STORED)); w.addDocument(doc); // 6 -- no author field doc = new Document(); - doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField("content", "random word stuck in alot of other text", TextField.TYPE_STORED)); + doc.add(newField("id", "6", StringField.TYPE_STORED)); w.addDocument(doc); // 7 -- no author field doc = new Document(); - doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "7", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); + doc.add(newField("content", "random word stuck in alot of other text", TextField.TYPE_STORED)); + doc.add(newField("id", "7", StringField.TYPE_STORED)); w.addDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); @@ -182,18 +184,18 @@ public class TermAllGroupHeadsCollectorTest extends LuceneTestCase { Document doc = new Document(); Document docNoGroup = new Document(); - Field group = newField("group", "", Field.Index.NOT_ANALYZED); + Field group = newField("group", "", StringField.TYPE_UNSTORED); doc.add(group); - Field sort1 = newField("sort1", "", Field.Index.NOT_ANALYZED); + Field sort1 = newField("sort1", "", StringField.TYPE_UNSTORED); doc.add(sort1); docNoGroup.add(sort1); - Field sort2 = newField("sort2", "", Field.Index.NOT_ANALYZED); + Field sort2 = newField("sort2", "", StringField.TYPE_UNSTORED); doc.add(sort2); docNoGroup.add(sort2); - Field sort3 = newField("sort3", "", Field.Index.NOT_ANALYZED); + Field sort3 = newField("sort3", "", StringField.TYPE_UNSTORED); doc.add(sort3); docNoGroup.add(sort3); - Field content = newField("content", "", Field.Index.ANALYZED); + Field content = newField("content", "", TextField.TYPE_UNSTORED); doc.add(content); docNoGroup.add(content); NumericField id = new NumericField("id"); diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupsCollectorTest.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupsCollectorTest.java index 0e6004e0696..6d384280b11 100644 --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupsCollectorTest.java +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TermAllGroupsCollectorTest.java @@ -5,7 +5,7 @@ package org.apache.lucene.search.grouping; * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with + * (the "License")); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 @@ -20,6 +20,8 @@ package org.apache.lucene.search.grouping; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; @@ -32,6 +34,8 @@ public class TermAllGroupsCollectorTest extends LuceneTestCase { public void testTotalGroupCount() throws Exception { final String groupField = "author"; + FieldType customType = new FieldType(); + customType.setStored(true); Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( @@ -41,51 +45,51 @@ public class TermAllGroupsCollectorTest extends LuceneTestCase { new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); // 0 Document doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author1")); + doc.add(new Field("content", TextField.TYPE_STORED, "random text")); + doc.add(new Field("id", customType, "1")); w.addDocument(doc); // 1 doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text blob", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "2", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author1")); + doc.add(new Field("content", TextField.TYPE_STORED, "some more random text blob")); + doc.add(new Field("id", customType, "2")); w.addDocument(doc); // 2 doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random textual data", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "3", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author1")); + doc.add(new Field("content", TextField.TYPE_STORED, "some more random textual data")); + doc.add(new Field("id", customType, "3")); w.addDocument(doc); w.commit(); // To ensure a second segment // 3 doc = new Document(); - doc.add(new Field(groupField, "author2", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "4", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author2")); + doc.add(new Field("content", TextField.TYPE_STORED, "some random text")); + doc.add(new Field("id", customType, "4")); w.addDocument(doc); // 4 doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "5", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author3")); + doc.add(new Field("content", TextField.TYPE_STORED, "some more random text")); + doc.add(new Field("id", customType, "5")); w.addDocument(doc); // 5 doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random blob", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author3")); + doc.add(new Field("content", TextField.TYPE_STORED, "random blob")); + doc.add(new Field("id", customType, "6")); w.addDocument(doc); // 6 -- no author field doc = new Document(); - doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("content", TextField.TYPE_STORED, "random word stuck in alot of other text")); + doc.add(new Field("id", customType, "6")); w.addDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); diff --git a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java index 3604ad3c4a1..6d5bf2dec62 100644 --- a/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java +++ b/modules/grouping/src/test/org/apache/lucene/search/grouping/TestGrouping.java @@ -23,8 +23,11 @@ import java.util.*; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -47,6 +50,9 @@ public class TestGrouping extends LuceneTestCase { final String groupField = "author"; + FieldType customType = new FieldType(); + customType.setStored(true); + Directory dir = newDirectory(); RandomIndexWriter w = new RandomIndexWriter( random, @@ -55,50 +61,50 @@ public class TestGrouping extends LuceneTestCase { new MockAnalyzer(random)).setMergePolicy(newLogMergePolicy())); // 0 Document doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "1", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author1")); + doc.add(new Field("content", TextField.TYPE_STORED, "random text")); + doc.add(new Field("id", customType, "1")); w.addDocument(doc); // 1 doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "2", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author1")); + doc.add(new Field("content", TextField.TYPE_STORED, "some more random text")); + doc.add(new Field("id", customType, "2")); w.addDocument(doc); // 2 doc = new Document(); - doc.add(new Field(groupField, "author1", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random textual data", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "3", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author1")); + doc.add(new Field("content", TextField.TYPE_STORED, "some more random textual data")); + doc.add(new Field("id", customType, "3")); w.addDocument(doc); // 3 doc = new Document(); - doc.add(new Field(groupField, "author2", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "4", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author2")); + doc.add(new Field("content", TextField.TYPE_STORED, "some random text")); + doc.add(new Field("id", customType, "4")); w.addDocument(doc); // 4 doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "some more random text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "5", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author3")); + doc.add(new Field("content", TextField.TYPE_STORED, "some more random text")); + doc.add(new Field("id", customType, "5")); w.addDocument(doc); // 5 doc = new Document(); - doc.add(new Field(groupField, "author3", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("content", "random", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO)); + doc.add(new Field(groupField, TextField.TYPE_STORED, "author3")); + doc.add(new Field("content", TextField.TYPE_STORED, "random")); + doc.add(new Field("id", customType, "6")); w.addDocument(doc); // 6 -- no author field doc = new Document(); - doc.add(new Field("content", "random word stuck in alot of other text", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("id", "6", Field.Store.YES, Field.Index.NO)); + doc.add(new Field("content", TextField.TYPE_STORED, "random word stuck in alot of other text")); + doc.add(new Field("id", customType, "6")); w.addDocument(doc); IndexSearcher indexSearcher = new IndexSearcher(w.getReader()); @@ -386,18 +392,19 @@ public class TestGrouping extends LuceneTestCase { Document doc = new Document(); docs.add(doc); if (groupValue.group != null) { - doc.add(newField("group", groupValue.group.utf8ToString(), Field.Index.NOT_ANALYZED)); + doc.add(newField("group", groupValue.group.utf8ToString(), StringField.TYPE_UNSTORED)); } - doc.add(newField("sort1", groupValue.sort1.utf8ToString(), Field.Index.NOT_ANALYZED)); - doc.add(newField("sort2", groupValue.sort2.utf8ToString(), Field.Index.NOT_ANALYZED)); + doc.add(newField("sort1", groupValue.sort1.utf8ToString(), StringField.TYPE_UNSTORED)); + doc.add(newField("sort2", groupValue.sort2.utf8ToString(), StringField.TYPE_UNSTORED)); doc.add(new NumericField("id").setIntValue(groupValue.id)); - doc.add(newField("content", groupValue.content, Field.Index.ANALYZED)); + doc.add(newField("content", groupValue.content, TextField.TYPE_UNSTORED)); //System.out.println("TEST: doc content=" + groupValue.content + " group=" + (groupValue.group == null ? "null" : groupValue.group.utf8ToString()) + " sort1=" + groupValue.sort1.utf8ToString() + " id=" + groupValue.id); } // So we can pull filter marking last doc in block: - final Field groupEnd = newField("groupend", "x", Field.Index.NOT_ANALYZED); - groupEnd.setIndexOptions(IndexOptions.DOCS_ONLY); - groupEnd.setOmitNorms(true); + FieldType ft = new FieldType(StringField.TYPE_UNSTORED); + ft.setIndexOptions(IndexOptions.DOCS_ONLY); + ft.setOmitNorms(true); + final Field groupEnd = newField("groupend", "x", ft); docs.get(docs.size()-1).add(groupEnd); // Add as a doc block: w.addDocuments(docs); @@ -497,15 +504,15 @@ public class TestGrouping extends LuceneTestCase { Document doc = new Document(); Document docNoGroup = new Document(); - Field group = newField("group", "", Field.Index.NOT_ANALYZED); + Field group = newField("group", "", StringField.TYPE_UNSTORED); doc.add(group); - Field sort1 = newField("sort1", "", Field.Index.NOT_ANALYZED); + Field sort1 = newField("sort1", "", StringField.TYPE_UNSTORED); doc.add(sort1); docNoGroup.add(sort1); - Field sort2 = newField("sort2", "", Field.Index.NOT_ANALYZED); + Field sort2 = newField("sort2", "", StringField.TYPE_UNSTORED); doc.add(sort2); docNoGroup.add(sort2); - Field content = newField("content", "", Field.Index.ANALYZED); + Field content = newField("content", "", TextField.TYPE_UNSTORED); doc.add(content); docNoGroup.add(content); NumericField id = new NumericField("id"); diff --git a/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java b/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java index 827a0a5b365..1229eecd41a 100644 --- a/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java +++ b/modules/join/src/test/org/apache/lucene/search/TestBlockJoin.java @@ -24,6 +24,7 @@ import java.util.List; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; @@ -42,16 +43,16 @@ public class TestBlockJoin extends LuceneTestCase { // One resume... private Document makeResume(String name, String country) { Document resume = new Document(); - resume.add(newField("docType", "resume", Field.Index.NOT_ANALYZED)); - resume.add(newField("name", name, Field.Store.YES, Field.Index.NOT_ANALYZED)); - resume.add(newField("country", country, Field.Index.NOT_ANALYZED)); + resume.add(newField("docType", "resume", StringField.TYPE_UNSTORED)); + resume.add(newField("name", name, StringField.TYPE_STORED)); + resume.add(newField("country", country, StringField.TYPE_UNSTORED)); return resume; } // ... has multiple jobs private Document makeJob(String skill, int year) { Document job = new Document(); - job.add(newField("skill", skill, Field.Store.YES, Field.Index.NOT_ANALYZED)); + job.add(newField("skill", skill, StringField.TYPE_STORED)); job.add(new NumericField("year").setIntValue(year)); return job; } @@ -188,15 +189,15 @@ public class TestBlockJoin extends LuceneTestCase { for(int parentDocID=0;parentDocID finfo = new SimpleOrderedMap(); for( Object o : doc.getFields() ) { - Fieldable fieldable = (Fieldable)o; + Field field = (Field)o; SimpleOrderedMap f = new SimpleOrderedMap(); - SchemaField sfield = schema.getFieldOrNull( fieldable.name() ); + SchemaField sfield = schema.getFieldOrNull( field.name() ); FieldType ftype = (sfield==null)?null:sfield.getType(); f.add( "type", (ftype==null)?null:ftype.getTypeName() ); f.add( "schema", getFieldFlags( sfield ) ); - f.add( "flags", getFieldFlags( fieldable ) ); + f.add( "flags", getFieldFlags( field ) ); - Term t = new Term(fieldable.name(), ftype!=null ? ftype.storedToIndexed(fieldable) : fieldable.stringValue()); + Term t = new Term(field.name(), ftype!=null ? ftype.storedToIndexed(field) : field.stringValue()); - f.add( "value", (ftype==null)?null:ftype.toExternal( fieldable ) ); + f.add( "value", (ftype==null)?null:ftype.toExternal( field ) ); // TODO: this really should be "stored" - f.add( "internal", fieldable.stringValue() ); // may be a binary number + f.add( "internal", field.stringValue() ); // may be a binary number - byte[] arr = fieldable.getBinaryValue(); - if (arr != null) { - f.add( "binary", Base64.byteArrayToBase64(arr, 0, arr.length)); + BytesRef bytes = field.binaryValue(); + if (bytes != null) { + f.add( "binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length)); } - f.add( "boost", fieldable.getBoost() ); + f.add( "boost", field.boost() ); f.add( "docFreq", t.text()==null ? 0 : reader.docFreq( t ) ); // this can be 0 for non-indexed fields // If we have a term vector, return that - if( fieldable.isTermVectorStored() ) { + if( field.storeTermVectors() ) { try { - TermFreqVector v = reader.getTermFreqVector( docId, fieldable.name() ); + TermFreqVector v = reader.getTermFreqVector( docId, field.name() ); if( v != null ) { SimpleOrderedMap tfv = new SimpleOrderedMap(); for( int i=0; iemptySet()); + //Only load the id field to get the uniqueKey of that + //field + + final String finalUniqFieldName = uniqFieldName; + + final List uniqValues = new ArrayList(); + final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() { + @Override + public boolean stringField(FieldInfo fieldInfo, IndexInput in, int numUTF8Bytes) throws IOException { + if (fieldInfo.name.equals(finalUniqFieldName)) { + final byte[] b = new byte[numUTF8Bytes]; + in.readBytes(b, 0, b.length); + uniqValues.add(new String(b, "UTF-8")); + } else { + in.seek(in.getFilePointer() + numUTF8Bytes); + } + return false; + } + + @Override + public boolean intField(FieldInfo fieldInfo, int value) throws IOException { + if (fieldInfo.name.equals(finalUniqFieldName)) { + uniqValues.add(Integer.toString(value)); + } + return false; + } + + @Override + public boolean longField(FieldInfo fieldInfo, long value) throws IOException { + if (fieldInfo.name.equals(finalUniqFieldName)) { + uniqValues.add(Long.toString(value)); + } + return false; + } + }; + TVMapper mapper = new TVMapper(reader); mapper.fieldOptions = allFields; //this will only stay set if fieldOptions.isEmpty() (in other words, only if the user didn't set any fields) while (iter.hasNext()) { @@ -205,13 +238,11 @@ public class TermVectorComponent extends SearchComponent implements SolrCoreAwar termVectors.add("doc-" + docId, docNL); if (keyField != null) { - Document document = reader.document(docId, fieldSelector); - Fieldable uniqId = document.getFieldable(uniqFieldName); + reader.document(docId, getUniqValue); String uniqVal = null; - if (uniqId != null) { - uniqVal = keyField.getType().storedToReadable(uniqId); - } - if (uniqVal != null) { + if (uniqValues.size() != 0) { + uniqVal = uniqValues.get(0); + uniqValues.clear(); docNL.add("uniqueKey", uniqVal); termVectors.add("uniqueKeyFieldName", uniqFieldName); } diff --git a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java index d2c5b107456..977e846f830 100644 --- a/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java +++ b/solr/core/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java @@ -33,6 +33,7 @@ import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.vectorhighlight.FastVectorHighlighter; @@ -416,7 +417,14 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf // END: Hack SolrParams params = req.getParams(); - String[] docTexts = doc.getValues(fieldName); + IndexableField[] docFields = doc.getFields(fieldName); + List listFields = new ArrayList(); + for (IndexableField field : docFields) { + listFields.add(field.stringValue()); + } + + String[] docTexts = (String[]) listFields.toArray(new String[listFields.size()]); + // according to Document javadoc, doc.getValues() never returns null. check empty instead of null if (docTexts.length == 0) return; @@ -537,7 +545,15 @@ public class DefaultSolrHighlighter extends SolrHighlighter implements PluginInf private void alternateField( NamedList docSummaries, SolrParams params, Document doc, String fieldName ){ String alternateField = params.getFieldParam(fieldName, HighlightParams.ALTERNATE_FIELD); if (alternateField != null && alternateField.length() > 0) { - String[] altTexts = doc.getValues(alternateField); + IndexableField[] docFields = doc.getFields(alternateField); + List listFields = new ArrayList(); + for (IndexableField field : docFields) { + if (field.binaryValue() == null) + listFields.add(field.stringValue()); + } + + String[] altTexts = listFields.toArray(new String[listFields.size()]); + if (altTexts != null && altTexts.length > 0){ int alternateFieldLen = params.getFieldInt(fieldName, HighlightParams.ALTERNATE_FIELD_LENGTH,0); if( alternateFieldLen <= 0 ){ diff --git a/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java b/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java index 55db776cfe7..870c0997a85 100755 --- a/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java @@ -16,13 +16,16 @@ */ package org.apache.solr.response; +import java.io.*; +import java.util.*; + import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrDocument; -import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.CommonParams; -import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.JavaBinCodec; +import org.apache.solr.common.util.NamedList; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.response.transform.DocTransformer; import org.apache.solr.response.transform.TransformContext; @@ -33,9 +36,6 @@ import org.apache.solr.search.SolrIndexSearcher; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.*; -import java.util.*; - public class BinaryResponseWriter implements BinaryQueryResponseWriter { private static final Logger LOG = LoggerFactory.getLogger(BinaryResponseWriter.class); @@ -159,7 +159,7 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter { public SolrDocument getDoc(Document doc) { SolrDocument solrDoc = new SolrDocument(); - for (Fieldable f : doc.getFields()) { + for (IndexableField f : doc) { String fieldName = f.name(); if( !returnFields.wantsField(fieldName) ) continue; @@ -168,8 +168,16 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter { if(sf != null) ft =sf.getType(); Object val; if (ft == null) { // handle fields not in the schema - if (f.isBinary()) val = f.getBinaryValue(); - else val = f.stringValue(); + BytesRef bytesRef = f.binaryValue(); + if (bytesRef != null) { + if (bytesRef.offset == 0 && bytesRef.length == bytesRef.bytes.length) { + val = bytesRef.bytes; + } else { + final byte[] bytes = new byte[bytesRef.length]; + val = bytes; + System.arraycopy(bytesRef.bytes, bytesRef.offset, bytes, 0, bytesRef.length); + } + } else val = f.stringValue(); } else { try { if (useFieldObjects && KNOWN_TYPES.contains(ft.getClass())) { diff --git a/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java b/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java index d35ad3c2bd6..e35dfde6847 100755 --- a/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/CSVResponseWriter.java @@ -19,8 +19,7 @@ package org.apache.solr.response; import org.apache.commons.csv.CSVPrinter; import org.apache.commons.csv.CSVStrategy; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.SolrException; @@ -32,10 +31,8 @@ import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.StrField; -import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocList; import org.apache.solr.search.ReturnFields; -import org.apache.solr.search.SolrIndexSearcher; import java.io.CharArrayWriter; import java.io.IOException; @@ -146,7 +143,7 @@ class CSVWriter extends TextResponseWriter { CSVSharedBufPrinter mvPrinter; // printer used to encode multiple values in a single CSV value // used to collect values - List values = new ArrayList(1); // low starting amount in case there are many fields + List values = new ArrayList(1); // low starting amount in case there are many fields int tmp; } diff --git a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java index 3147b1e8d3c..4a9e0f81a82 100644 --- a/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/JSONResponseWriter.java @@ -26,7 +26,7 @@ import java.util.Iterator; import java.util.Map; import java.util.Set; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.util.NamedList; @@ -302,10 +302,10 @@ class JSONWriter extends TextResponseWriter { protected static class MultiValueField { final SchemaField sfield; - final ArrayList fields; - MultiValueField(SchemaField sfield, Fieldable firstVal) { + final ArrayList fields; + MultiValueField(SchemaField sfield, IndexableField firstVal) { this.sfield = sfield; - this.fields = new ArrayList(4); + this.fields = new ArrayList(4); this.fields.add(firstVal); } } diff --git a/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java b/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java index 36abcaa7d8f..ea11532c03f 100644 --- a/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/TextResponseWriter.java @@ -22,7 +22,7 @@ import java.io.Writer; import java.util.*; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.util.FastWriter; @@ -120,8 +120,8 @@ public abstract class TextResponseWriter { } else if (val instanceof String) { writeStr(name, val.toString(), true); // micro-optimization... using toString() avoids a cast first - } else if (val instanceof Fieldable) { - Fieldable f = (Fieldable)val; + } else if (val instanceof IndexableField) { + IndexableField f = (IndexableField)val; SchemaField sf = schema.getFieldOrNull( f.name() ); if( sf != null ) { sf.getType().write(this, name, f); @@ -202,7 +202,7 @@ public abstract class TextResponseWriter { public final SolrDocument toSolrDocument( Document doc ) { SolrDocument out = new SolrDocument(); - for( Fieldable f : doc.getFields() ) { + for( IndexableField f : doc) { // Make sure multivalued fields are represented as lists Object existing = out.get(f.name()); if (existing == null) { diff --git a/solr/core/src/java/org/apache/solr/schema/BCDIntField.java b/solr/core/src/java/org/apache/solr/schema/BCDIntField.java index ae59f27e3b6..fb0f0b6519a 100644 --- a/solr/core/src/java/org/apache/solr/schema/BCDIntField.java +++ b/solr/core/src/java/org/apache/solr/schema/BCDIntField.java @@ -20,7 +20,7 @@ package org.apache.solr.schema; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; import org.apache.solr.search.QParser; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.util.BCDUtils; import org.apache.solr.response.TextResponseWriter; @@ -51,13 +51,13 @@ public class BCDIntField extends FieldType { } @Override - public String toExternal(Fieldable f) { + public String toExternal(IndexableField f) { return indexedToReadable(f.stringValue()); } // Note, this can't return type 'Integer' because BCDStrField and BCDLong extend it @Override - public Object toObject(Fieldable f) { + public Object toObject(IndexableField f) { return Integer.valueOf( toExternal(f) ); } @@ -67,7 +67,7 @@ public class BCDIntField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeInt(name,toExternal(f)); } } diff --git a/solr/core/src/java/org/apache/solr/schema/BCDLongField.java b/solr/core/src/java/org/apache/solr/schema/BCDLongField.java index fa78ba2b190..83359751e01 100644 --- a/solr/core/src/java/org/apache/solr/schema/BCDLongField.java +++ b/solr/core/src/java/org/apache/solr/schema/BCDLongField.java @@ -17,13 +17,13 @@ package org.apache.solr.schema; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; /** * */ public class BCDLongField extends BCDIntField { @Override - public Long toObject(Fieldable f) { + public Long toObject(IndexableField f) { return Long.valueOf( toExternal(f) ); } } diff --git a/solr/core/src/java/org/apache/solr/schema/BCDStrField.java b/solr/core/src/java/org/apache/solr/schema/BCDStrField.java index a7bf0828688..345f789ed1e 100644 --- a/solr/core/src/java/org/apache/solr/schema/BCDStrField.java +++ b/solr/core/src/java/org/apache/solr/schema/BCDStrField.java @@ -17,7 +17,7 @@ package org.apache.solr.schema; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; /** * */ @@ -27,7 +27,7 @@ public class BCDStrField extends BCDIntField { * is not an integer, it will not survive the base10k conversion! */ @Override - public String toObject(Fieldable f) { + public String toObject(IndexableField f) { return toExternal(f); } } diff --git a/solr/core/src/java/org/apache/solr/schema/BinaryField.java b/solr/core/src/java/org/apache/solr/schema/BinaryField.java index 97567660bfc..0223ed51571 100644 --- a/solr/core/src/java/org/apache/solr/schema/BinaryField.java +++ b/solr/core/src/java/org/apache/solr/schema/BinaryField.java @@ -17,24 +17,25 @@ package org.apache.solr.schema; -import org.apache.solr.response.TextResponseWriter; -import org.apache.solr.common.util.Base64; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.document.Field; -import org.apache.lucene.search.SortField; - import java.io.IOException; import java.nio.ByteBuffer; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.common.util.Base64; +import org.apache.solr.response.TextResponseWriter; + public class BinaryField extends FieldType { - private String toBase64String(ByteBuffer buf) { + private String toBase64String(ByteBuffer buf) { return Base64.byteArrayToBase64(buf.array(), buf.position(), buf.limit()-buf.position()); } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeStr(name, toBase64String(toObject(f)), false); } @@ -45,17 +46,18 @@ public class BinaryField extends FieldType { @Override - public String toExternal(Fieldable f) { + public String toExternal(IndexableField f) { return toBase64String(toObject(f)); } - - @Override - public ByteBuffer toObject(Fieldable f) { - return ByteBuffer.wrap(f.getBinaryValue(), f.getBinaryOffset(), f.getBinaryLength() ) ; - } @Override - public Fieldable createField(SchemaField field, Object val, float boost) { + public ByteBuffer toObject(IndexableField f) { + BytesRef bytes = f.binaryValue(); + return ByteBuffer.wrap(bytes.bytes, bytes.offset, bytes.length); + } + + @Override + public IndexableField createField(SchemaField field, Object val, float boost) { if (val == null) return null; if (!field.stored()) { log.trace("Ignoring unstored binary field: " + field); @@ -79,7 +81,7 @@ public class BinaryField extends FieldType { len = buf.length; } - Field f = new Field(field.getName(), buf, offset, len); + Field f = new org.apache.lucene.document.BinaryField(field.getName(), buf, offset, len); f.setBoost(boost); return f; } diff --git a/solr/core/src/java/org/apache/solr/schema/BoolField.java b/solr/core/src/java/org/apache/solr/schema/BoolField.java index fd3bdf9d5c2..02ac067aecc 100644 --- a/solr/core/src/java/org/apache/solr/schema/BoolField.java +++ b/solr/core/src/java/org/apache/solr/schema/BoolField.java @@ -18,13 +18,14 @@ package org.apache.solr.schema; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.FieldCache; +import org.apache.lucene.search.SortField; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.queries.function.DocValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.BoolDocValues; import org.apache.lucene.queries.function.valuesource.OrdFieldSource; -import org.apache.lucene.search.FieldCache; -import org.apache.lucene.search.SortField; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.lucene.util.mutable.MutableValue; import org.apache.lucene.util.mutable.MutableValueBool; @@ -33,7 +34,6 @@ import org.apache.solr.search.function.*; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.document.Fieldable; import org.apache.solr.response.TextResponseWriter; import org.apache.solr.analysis.SolrAnalyzer; @@ -117,12 +117,12 @@ public class BoolField extends FieldType { } @Override - public String toExternal(Fieldable f) { + public String toExternal(IndexableField f) { return indexedToReadable(f.stringValue()); } @Override - public Boolean toObject(Fieldable f) { + public Boolean toObject(IndexableField f) { return Boolean.valueOf( toExternal(f) ); } @@ -151,7 +151,7 @@ public class BoolField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeBool(name, f.stringValue().charAt(0) == 'T'); } } diff --git a/solr/core/src/java/org/apache/solr/schema/ByteField.java b/solr/core/src/java/org/apache/solr/schema/ByteField.java index ec35f53a067..37ad2796119 100644 --- a/solr/core/src/java/org/apache/solr/schema/ByteField.java +++ b/solr/core/src/java/org/apache/solr/schema/ByteField.java @@ -16,9 +16,9 @@ package org.apache.solr.schema; * limitations under the License. */ -import org.apache.lucene.document.Fieldable; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.ByteFieldSource; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.SortField; import org.apache.lucene.search.cache.ByteValuesCreator; import org.apache.lucene.search.cache.CachedArrayCreator; @@ -52,7 +52,7 @@ public class ByteField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { String s = f.stringValue(); // these values may be from a legacy lucene index, which may @@ -77,7 +77,7 @@ public class ByteField extends FieldType { } @Override - public Byte toObject(Fieldable f) { + public Byte toObject(IndexableField f) { return Byte.valueOf(toExternal(f)); } } diff --git a/solr/core/src/java/org/apache/solr/schema/CollationField.java b/solr/core/src/java/org/apache/solr/schema/CollationField.java index 7f915a4724a..143ae6fb971 100644 --- a/solr/core/src/java/org/apache/solr/schema/CollationField.java +++ b/solr/core/src/java/org/apache/solr/schema/CollationField.java @@ -31,7 +31,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.collation.CollationKeyAnalyzer; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermRangeQuery; @@ -185,7 +185,7 @@ public class CollationField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeStr(name, f.stringValue(), true); } diff --git a/solr/core/src/java/org/apache/solr/schema/DateField.java b/solr/core/src/java/org/apache/solr/schema/DateField.java index edf3abda053..37632fb3850 100644 --- a/solr/core/src/java/org/apache/solr/schema/DateField.java +++ b/solr/core/src/java/org/apache/solr/schema/DateField.java @@ -17,15 +17,15 @@ package org.apache.solr.schema; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexReader.AtomicReaderContext; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.queries.function.DocValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.docvalues.StringIndexDocValues; import org.apache.lucene.queries.function.valuesource.FieldCacheSource; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.TermRangeQuery; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.CharsRef; import org.apache.solr.common.SolrException; @@ -186,7 +186,7 @@ public class DateField extends FieldType { } } - public Fieldable createField(SchemaField field, Object value, float boost) { + public IndexableField createField(SchemaField field, Object value, float boost) { // Convert to a string before indexing if(value instanceof Date) { value = toInternal( (Date)value ) + Z; @@ -211,7 +211,7 @@ public class DateField extends FieldType { } @Override - public String toExternal(Fieldable f) { + public String toExternal(IndexableField f) { return indexedToReadable(f.stringValue()); } @@ -220,7 +220,7 @@ public class DateField extends FieldType { } @Override - public Date toObject(Fieldable f) { + public Date toObject(IndexableField f) { try { return parseDate( toExternal(f) ); } @@ -235,7 +235,7 @@ public class DateField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeDate(name, toExternal(f)); } diff --git a/solr/core/src/java/org/apache/solr/schema/DoubleField.java b/solr/core/src/java/org/apache/solr/schema/DoubleField.java index 178263333f5..d7d7d0c1243 100644 --- a/solr/core/src/java/org/apache/solr/schema/DoubleField.java +++ b/solr/core/src/java/org/apache/solr/schema/DoubleField.java @@ -17,9 +17,9 @@ package org.apache.solr.schema; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.DoubleFieldSource; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.SortField; import org.apache.lucene.search.cache.CachedArrayCreator; import org.apache.lucene.search.cache.DoubleValuesCreator; @@ -52,7 +52,7 @@ public class DoubleField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { String s = f.stringValue(); // these values may be from a legacy lucene index, which may @@ -78,7 +78,7 @@ public class DoubleField extends FieldType { @Override - public Double toObject(Fieldable f) { + public Double toObject(IndexableField f) { return Double.valueOf(toExternal(f)); } } diff --git a/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java b/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java index f600b2d5f2d..5ba98a4d751 100755 --- a/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java +++ b/solr/core/src/java/org/apache/solr/schema/ExternalFileField.java @@ -18,7 +18,7 @@ package org.apache.solr.schema; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.SortField; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.search.function.FileFloatSource; import org.apache.solr.search.QParser; import org.apache.solr.response.TextResponseWriter; @@ -76,7 +76,7 @@ public class ExternalFileField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { throw new UnsupportedOperationException(); } diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index db410e87f90..34fb0dbc347 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -22,8 +22,8 @@ import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.Term; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.Query; @@ -91,7 +91,7 @@ public abstract class FieldType extends FieldProperties { } /** - * A "polyField" is a FieldType that can produce more than one Fieldable instance for a single value, via the {@link #createFields(org.apache.solr.schema.SchemaField, Object, float)} method. This is useful + * A "polyField" is a FieldType that can produce more than one IndexableField instance for a single value, via the {@link #createFields(org.apache.solr.schema.SchemaField, Object, float)} method. This is useful * when hiding the implementation details of a field from the Solr end user. For instance, a spatial point may be represented by multiple different fields. * @return true if the {@link #createFields(org.apache.solr.schema.SchemaField, Object, float)} method may return more than one field */ @@ -235,7 +235,7 @@ public abstract class FieldType extends FieldProperties { * * */ - public Fieldable createField(SchemaField field, Object value, float boost) { + public IndexableField createField(SchemaField field, Object value, float boost) { if (!field.indexed() && !field.stored()) { if (log.isTraceEnabled()) log.trace("Ignoring unindexed/unstored field: " + field); @@ -250,78 +250,47 @@ public abstract class FieldType extends FieldProperties { } if (val==null) return null; - return createField(field.getName(), val, - getFieldStore(field, val), getFieldIndex(field, val), - getFieldTermVec(field, val), field.omitNorms(), - getIndexOptions(field, val), boost); + org.apache.lucene.document.FieldType newType = new org.apache.lucene.document.FieldType(); + newType.setIndexed(field.indexed()); + newType.setTokenized(field.isTokenized()); + newType.setStored(field.stored()); + newType.setOmitNorms(field.omitNorms()); + newType.setIndexOptions(getIndexOptions(field, val)); + newType.setStoreTermVectors(field.storeTermVector()); + newType.setStoreTermVectorOffsets(field.storeTermOffsets()); + newType.setStoreTermVectorPositions(field.storeTermPositions()); + + return createField(field.getName(), val, newType, boost); } - /** * Create the field from native Lucene parts. Mostly intended for use by FieldTypes outputing multiple * Fields per SchemaField * @param name The name of the field * @param val The _internal_ value to index - * @param storage {@link org.apache.lucene.document.Field.Store} - * @param index {@link org.apache.lucene.document.Field.Index} - * @param vec {@link org.apache.lucene.document.Field.TermVector} - * @param omitNorms true if norms should be omitted - * @param options options for what should be indexed in the postings + * @param type {@link org.apache.lucene.document.FieldType} * @param boost The boost value - * @return the {@link org.apache.lucene.document.Fieldable}. + * @return the {@link org.apache.lucene.index.IndexableField}. */ - protected Fieldable createField(String name, String val, Field.Store storage, Field.Index index, - Field.TermVector vec, boolean omitNorms, IndexOptions options, float boost){ - Field f = new Field(name, - val, - storage, - index, - vec); - if (index.isIndexed()) { - f.setOmitNorms(omitNorms); - f.setIndexOptions(options); - f.setBoost(boost); - } + protected IndexableField createField(String name, String val, org.apache.lucene.document.FieldType type, float boost){ + Field f = new Field(name, type, val); + f.setBoost(boost); return f; } /** - * Given a {@link org.apache.solr.schema.SchemaField}, create one or more {@link org.apache.lucene.document.Fieldable} instances + * Given a {@link org.apache.solr.schema.SchemaField}, create one or more {@link org.apache.lucene.index.IndexableField} instances * @param field the {@link org.apache.solr.schema.SchemaField} * @param value The value to add to the field * @param boost The boost to apply - * @return An array of {@link org.apache.lucene.document.Fieldable} + * @return An array of {@link org.apache.lucene.index.IndexableField} * * @see #createField(SchemaField, Object, float) * @see #isPolyField() */ - public Fieldable[] createFields(SchemaField field, Object value, float boost) { - Fieldable f = createField( field, value, boost); - return f==null ? new Fieldable[]{} : new Fieldable[]{f}; - } - - /* Helpers for field construction */ - protected Field.TermVector getFieldTermVec(SchemaField field, - String internalVal) { - Field.TermVector ftv = Field.TermVector.NO; - if (field.storeTermPositions() && field.storeTermOffsets()) - ftv = Field.TermVector.WITH_POSITIONS_OFFSETS; - else if (field.storeTermPositions()) - ftv = Field.TermVector.WITH_POSITIONS; - else if (field.storeTermOffsets()) - ftv = Field.TermVector.WITH_OFFSETS; - else if (field.storeTermVector()) - ftv = Field.TermVector.YES; - return ftv; - } - protected Field.Store getFieldStore(SchemaField field, - String internalVal) { - return field.stored() ? Field.Store.YES : Field.Store.NO; - } - protected Field.Index getFieldIndex(SchemaField field, - String internalVal) { - return field.indexed() ? (isTokenized() ? Field.Index.ANALYZED : - Field.Index.NOT_ANALYZED) : Field.Index.NO; + public IndexableField[] createFields(SchemaField field, Object value, float boost) { + IndexableField f = createField( field, value, boost); + return f==null ? new IndexableField[]{} : new IndexableField[]{f}; } protected IndexOptions getIndexOptions(SchemaField field, String internalVal) { @@ -350,9 +319,9 @@ public abstract class FieldType extends FieldProperties { * value * @see #toInternal */ - public String toExternal(Fieldable f) { + public String toExternal(IndexableField f) { // currently used in writing XML of the search result (but perhaps - // a more efficient toXML(Fieldable f, Writer w) should be used + // a more efficient toXML(IndexableField f, Writer w) should be used // in the future. return f.stringValue(); } @@ -362,14 +331,14 @@ public abstract class FieldType extends FieldProperties { * @see #toInternal * @since solr 1.3 */ - public Object toObject(Fieldable f) { + public Object toObject(IndexableField f) { return toExternal(f); // by default use the string } public Object toObject(SchemaField sf, BytesRef term) { final CharsRef ref = new CharsRef(term.length); indexedToReadable(term, ref); - final Fieldable f = createField(sf, ref.toString(), 1.0f); + final IndexableField f = createField(sf, ref.toString(), 1.0f); return toObject(f); } @@ -385,12 +354,12 @@ public abstract class FieldType extends FieldProperties { } /** Given the stored field, return the human readable representation */ - public String storedToReadable(Fieldable f) { + public String storedToReadable(IndexableField f) { return toExternal(f); } /** Given the stored field, return the indexed form */ - public String storedToIndexed(Fieldable f) { + public String storedToIndexed(IndexableField f) { // right now, the transformation of single valued fields like SortableInt // is done when the Field is created, not at analysis time... this means // that the indexed form is the same as the stored field form. @@ -569,7 +538,7 @@ public abstract class FieldType extends FieldProperties { /** * calls back to TextResponseWriter to write the field value */ - public abstract void write(TextResponseWriter writer, String name, Fieldable f) throws IOException; + public abstract void write(TextResponseWriter writer, String name, IndexableField f) throws IOException; /** diff --git a/solr/core/src/java/org/apache/solr/schema/FloatField.java b/solr/core/src/java/org/apache/solr/schema/FloatField.java index 59be59ac691..15ed002e644 100644 --- a/solr/core/src/java/org/apache/solr/schema/FloatField.java +++ b/solr/core/src/java/org/apache/solr/schema/FloatField.java @@ -23,7 +23,7 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.cache.CachedArrayCreator; import org.apache.lucene.search.cache.FloatValuesCreator; import org.apache.solr.search.QParser; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.response.TextResponseWriter; import java.util.Map; @@ -50,7 +50,7 @@ public class FloatField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { String s = f.stringValue(); // these values may be from a legacy lucene index, which may @@ -75,7 +75,7 @@ public class FloatField extends FieldType { } @Override - public Float toObject(Fieldable f) { + public Float toObject(IndexableField f) { return Float.valueOf( toExternal(f) ); } } diff --git a/solr/core/src/java/org/apache/solr/schema/GeoHashField.java b/solr/core/src/java/org/apache/solr/schema/GeoHashField.java index f28878ca95c..8380360e146 100644 --- a/solr/core/src/java/org/apache/solr/schema/GeoHashField.java +++ b/solr/core/src/java/org/apache/solr/schema/GeoHashField.java @@ -17,9 +17,9 @@ package org.apache.solr.schema; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.LiteralValueSource; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.SortField; import org.apache.lucene.spatial.geohash.GeoHashUtils; @@ -68,14 +68,14 @@ public class GeoHashField extends FieldType implements SpatialQueryable { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeStr(name, toExternal(f), false); } @Override - public String toExternal(Fieldable f) { + public String toExternal(IndexableField f) { double[] latLon = GeoHashUtils.decode(f.stringValue()); return latLon[0] + "," + latLon[1]; } diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java index 237bb463287..90c27564e04 100644 --- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java @@ -19,9 +19,8 @@ package org.apache.solr.schema; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Similarity; import org.apache.lucene.search.SimilarityProvider; import org.apache.lucene.util.Version; @@ -34,7 +33,6 @@ import org.apache.solr.common.util.SystemIdResolver; import org.apache.solr.core.SolrConfig; import org.apache.solr.core.Config; import org.apache.solr.core.SolrResourceLoader; -import org.apache.solr.search.SolrQueryParser; import org.apache.solr.search.SolrSimilarityProvider; import org.apache.solr.util.plugin.SolrCoreAware; import org.w3c.dom.*; @@ -43,8 +41,6 @@ import org.xml.sax.InputSource; import javax.xml.xpath.XPath; import javax.xml.xpath.XPathConstants; import javax.xml.xpath.XPathExpressionException; -import javax.xml.xpath.XPathFactory; -import java.io.InputStream; import java.io.Reader; import java.io.IOException; import java.util.*; @@ -258,8 +254,8 @@ public final class IndexSchema { * @return null if this schema has no unique key field * @see #printableUniqueKey */ - public Fieldable getUniqueKeyField(org.apache.lucene.document.Document doc) { - return doc.getFieldable(uniqueKeyFieldName); // this should return null if name is null + public IndexableField getUniqueKeyField(org.apache.lucene.document.Document doc) { + return doc.getField(uniqueKeyFieldName); // this should return null if name is null } /** @@ -268,8 +264,8 @@ public final class IndexSchema { * @return null if this schema has no unique key field */ public String printableUniqueKey(org.apache.lucene.document.Document doc) { - Fieldable f = doc.getFieldable(uniqueKeyFieldName); - return f==null ? null : uniqueKeyFieldType.toExternal(f); + IndexableField f = doc.getField(uniqueKeyFieldName); + return f==null ? null : uniqueKeyFieldType.toExternal(f); } private SchemaField getIndexedField(String fname) { diff --git a/solr/core/src/java/org/apache/solr/schema/IntField.java b/solr/core/src/java/org/apache/solr/schema/IntField.java index f52b7f9cc6d..72bc5971f58 100644 --- a/solr/core/src/java/org/apache/solr/schema/IntField.java +++ b/solr/core/src/java/org/apache/solr/schema/IntField.java @@ -23,7 +23,7 @@ import org.apache.lucene.search.SortField; import org.apache.lucene.search.cache.CachedArrayCreator; import org.apache.lucene.search.cache.IntValuesCreator; import org.apache.solr.search.QParser; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.response.TextResponseWriter; import java.util.Map; @@ -50,7 +50,7 @@ public class IntField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { String s = f.stringValue(); // these values may be from a legacy lucene index, which may @@ -75,7 +75,7 @@ public class IntField extends FieldType { } @Override - public Integer toObject(Fieldable f) { + public Integer toObject(IndexableField f) { return Integer.valueOf( toExternal(f) ); } } diff --git a/solr/core/src/java/org/apache/solr/schema/LatLonType.java b/solr/core/src/java/org/apache/solr/schema/LatLonType.java index 119b1983b33..903d663d554 100644 --- a/solr/core/src/java/org/apache/solr/schema/LatLonType.java +++ b/solr/core/src/java/org/apache/solr/schema/LatLonType.java @@ -16,9 +16,8 @@ package org.apache.solr.schema; * limitations under the License. */ -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.queries.function.DocValues; @@ -55,10 +54,10 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery } @Override - public Fieldable[] createFields(SchemaField field, Object value, float boost) { + public IndexableField[] createFields(SchemaField field, Object value, float boost) { String externalVal = value.toString(); //we could have tileDiff + 3 fields (two for the lat/lon, one for storage) - Fieldable[] f = new Fieldable[(field.indexed() ? 2 : 0) + (field.stored() ? 1 : 0)]; + IndexableField[] f = new IndexableField[(field.indexed() ? 2 : 0) + (field.stored() ? 1 : 0)]; if (field.indexed()) { int i = 0; double[] latLon = new double[0]; @@ -76,9 +75,9 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery } if (field.stored()) { - f[f.length - 1] = createField(field.getName(), externalVal, - getFieldStore(field, externalVal), Field.Index.NO, Field.TermVector.NO, - false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, boost); + FieldType customType = new FieldType(); + customType.setStored(true); + f[f.length - 1] = createField(field.getName(), externalVal, customType, boost); } return f; } @@ -268,7 +267,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { writer.writeStr(name, f.stringValue(), false); } @@ -282,7 +281,7 @@ public class LatLonType extends AbstractSubTypeFieldType implements SpatialQuery //It never makes sense to create a single field, so make it impossible to happen @Override - public Fieldable createField(SchemaField field, Object value, float boost) { + public IndexableField createField(SchemaField field, Object value, float boost) { throw new UnsupportedOperationException("LatLonType uses multiple fields. field=" + field.getName()); } diff --git a/solr/core/src/java/org/apache/solr/schema/LongField.java b/solr/core/src/java/org/apache/solr/schema/LongField.java index 6a2e1e5945c..6c3a9dc7b1a 100644 --- a/solr/core/src/java/org/apache/solr/schema/LongField.java +++ b/solr/core/src/java/org/apache/solr/schema/LongField.java @@ -17,9 +17,9 @@ package org.apache.solr.schema; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.LongFieldSource; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.SortField; import org.apache.lucene.search.cache.CachedArrayCreator; import org.apache.lucene.search.cache.LongValuesCreator; @@ -52,7 +52,7 @@ public class LongField extends FieldType { } @Override - public void write(TextResponseWriter writer, String name, Fieldable f) throws IOException { + public void write(TextResponseWriter writer, String name, IndexableField f) throws IOException { String s = f.stringValue(); // these values may be from a legacy lucene index, which may @@ -77,7 +77,7 @@ public class LongField extends FieldType { } @Override - public Long toObject(Fieldable f) { + public Long toObject(IndexableField f) { return Long.valueOf( toExternal(f) ); } } diff --git a/solr/core/src/java/org/apache/solr/schema/PointType.java b/solr/core/src/java/org/apache/solr/schema/PointType.java index 07ccc12c771..0fcdde34313 100644 --- a/solr/core/src/java/org/apache/solr/schema/PointType.java +++ b/solr/core/src/java/org/apache/solr/schema/PointType.java @@ -17,11 +17,9 @@ package org.apache.solr.schema; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.queries.function.valuesource.VectorValueSource; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; @@ -69,7 +67,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable { } @Override - public Fieldable[] createFields(SchemaField field, Object value, float boost) { + public IndexableField[] createFields(SchemaField field, Object value, float boost) { String externalVal = value.toString(); String[] point = new String[0]; try { @@ -79,7 +77,7 @@ public class PointType extends CoordinateFieldType implements SpatialQueryable { } // TODO: this doesn't currently support polyFields as sub-field types - Fieldable[] f = new Fieldable[ (field.indexed() ? dimension : 0) + (field.stored() ? 1 : 0) ]; + IndexableField[] f = new IndexableField[ (field.indexed() ? dimension : 0) + (field.stored() ? 1 : 0) ]; if (field.indexed()) { for (int i=0; i)null); } - /** Retrieve a {@link Document} using a {@link org.apache.lucene.document.FieldSelector} - * This method does not currently use the Solr document cache. + /** Visit a document's fields using a {@link StoredFieldVisitor} + * This method does not currently use the Solr document cache. * - * @see IndexReader#document(int, FieldSelector) */ + * @see IndexReader#document(int, StoredFieldVisitor) */ @Override - public Document doc(int n, FieldSelector fieldSelector) throws IOException { - return getIndexReader().document(n, fieldSelector); + public void doc(int n, StoredFieldVisitor visitor) throws IOException { + getIndexReader().document(n, visitor); } /** @@ -462,8 +464,9 @@ public class SolrIndexSearcher extends IndexSearcher implements SolrInfoMBean { if(!enableLazyFieldLoading || fields == null) { d = getIndexReader().document(i); } else { - d = getIndexReader().document(i, - new SetNonLazyFieldSelector(fields)); + final FieldSelectorVisitor visitor = new FieldSelectorVisitor(new SetNonLazyFieldSelector(fields)); + getIndexReader().document(i, visitor); + d = visitor.getDocument(); } if (documentCache != null) { diff --git a/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java b/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java index 20a49722327..459feb3d046 100644 --- a/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java +++ b/solr/core/src/java/org/apache/solr/spelling/FileBasedSpellChecker.java @@ -25,7 +25,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.search.spell.HighFrequencyDictionary; import org.apache.lucene.search.spell.PlainTextDictionary; import org.apache.lucene.store.RAMDirectory; @@ -100,7 +100,7 @@ public class FileBasedSpellChecker extends AbstractLuceneSpellChecker { for (String s : lines) { Document d = new Document(); - d.add(new Field(WORD_FIELD_NAME, s, Field.Store.NO, Field.Index.ANALYZED)); + d.add(new TextField(WORD_FIELD_NAME, s)); writer.addDocument(d); } writer.optimize(); diff --git a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java index 4b246a2f3a3..62f7bf350fb 100644 --- a/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java +++ b/solr/core/src/java/org/apache/solr/update/AddUpdateCommand.java @@ -18,7 +18,6 @@ package org.apache.solr.update; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.Term; import org.apache.lucene.util.BytesRef; import org.apache.solr.common.SolrException; diff --git a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java index 3194dd7c72e..87a34a4d86d 100644 --- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java +++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java @@ -22,7 +22,7 @@ import java.util.HashMap; import java.util.List; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrInputDocument; @@ -56,7 +56,7 @@ public class DocumentBuilder { // might actually want to map it to something. If createField() // returns null, then we don't store the field. if (sfield.isPolyField()) { - Fieldable[] fields = sfield.createFields(val, boost); + IndexableField[] fields = sfield.createFields(val, boost); if (fields.length > 0) { if (!sfield.multiValued()) { String oldValue = map.put(sfield.getName(), val); @@ -66,12 +66,12 @@ public class DocumentBuilder { } } // Add each field - for (Fieldable field : fields) { + for (IndexableField field : fields) { doc.add(field); } } } else { - Fieldable field = sfield.createField(val, boost); + IndexableField field = sfield.createField(val, boost); if (field != null) { if (!sfield.multiValued()) { String oldValue = map.put(sfield.getName(), val); @@ -145,10 +145,6 @@ public class DocumentBuilder { } } - public void setBoost(float boost) { - doc.setBoost(boost); - } - public void endDoc() { } @@ -159,7 +155,7 @@ public class DocumentBuilder { // default value are defacto 'required' fields. List missingFields = null; for (SchemaField field : schema.getRequiredFields()) { - if (doc.getFieldable(field.getName() ) == null) { + if (doc.getField(field.getName() ) == null) { if (field.getDefaultValue() != null) { addField(doc, field, field.getDefaultValue(), 1.0f); } else { @@ -176,7 +172,7 @@ public class DocumentBuilder { // add the uniqueKey if possible if( schema.getUniqueKeyField() != null ) { String n = schema.getUniqueKeyField().getName(); - String v = doc.get( n ); + String v = doc.getField( n ).stringValue(); builder.append( "Document ["+n+"="+v+"] " ); } builder.append("missing required fields: " ); @@ -194,12 +190,12 @@ public class DocumentBuilder { private static void addField(Document doc, SchemaField field, Object val, float boost) { if (field.isPolyField()) { - Fieldable[] farr = field.getType().createFields(field, val, boost); - for (Fieldable f : farr) { + IndexableField[] farr = field.getType().createFields(field, val, boost); + for (IndexableField f : farr) { if (f != null) doc.add(f); // null fields are not added } } else { - Fieldable f = field.createField(val, boost); + IndexableField f = field.createField(val, boost); if (f != null) doc.add(f); // null fields are not added } } @@ -231,7 +227,7 @@ public class DocumentBuilder { public static Document toDocument( SolrInputDocument doc, IndexSchema schema ) { Document out = new Document(); - out.setBoost( doc.getDocumentBoost() ); + final float docBoost = doc.getDocumentBoost(); // Load fields from SolrDocument to Document for( SolrInputField field : doc ) { @@ -258,7 +254,7 @@ public class DocumentBuilder { hasField = true; if (sfield != null) { used = true; - addField(out, sfield, v, boost); + addField(out, sfield, v, docBoost*boost); } // Check if we should copy this field to any other fields. @@ -267,7 +263,7 @@ public class DocumentBuilder { for (CopyField cf : copyFields) { SchemaField destinationField = cf.getDestination(); // check if the copy field is a multivalued or not - if (!destinationField.multiValued() && out.getFieldable(destinationField.getName()) != null) { + if (!destinationField.multiValued() && out.getField(destinationField.getName()) != null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: "+getID(doc, schema)+"multiple values encountered for non multiValued copy field " + destinationField.getName() + ": " + v); @@ -281,9 +277,9 @@ public class DocumentBuilder { val = cf.getLimitedValue((String)val); } - Fieldable [] fields = destinationField.createFields(val, boost); + IndexableField [] fields = destinationField.createFields(val, docBoost*boost); if (fields != null) { // null fields are not added - for (Fieldable f : fields) { + for (IndexableField f : fields) { if(f != null) out.add(f); } } @@ -293,7 +289,7 @@ public class DocumentBuilder { // document boost and *all* boosts on values of that field. // For multi-valued fields, we only want to set the boost on the // first field. - boost = 1.0f; + boost = docBoost; } } catch( Exception ex ) { @@ -313,7 +309,7 @@ public class DocumentBuilder { // Now validate required fields or add default values // fields with default values are defacto 'required' for (SchemaField field : schema.getRequiredFields()) { - if (out.getFieldable(field.getName() ) == null) { + if (out.getField(field.getName() ) == null) { if (field.getDefaultValue() != null) { addField(out, field, field.getDefaultValue(), 1.0f); } @@ -339,8 +335,8 @@ public class DocumentBuilder { */ public SolrDocument loadStoredFields( SolrDocument doc, Document luceneDoc ) { - for( Fieldable field : luceneDoc.getFields() ) { - if( field.isStored() ) { + for( IndexableField field : luceneDoc) { + if( field.stored() ) { SchemaField sf = schema.getField( field.name() ); if( !schema.isCopyFieldTarget( sf ) ) { doc.addField( field.name(), sf.getType().toObject( field ) ); diff --git a/solr/core/src/java/org/apache/solr/update/UpdateHandler.java b/solr/core/src/java/org/apache/solr/update/UpdateHandler.java index 95179e9d3ed..318696b4612 100644 --- a/solr/core/src/java/org/apache/solr/update/UpdateHandler.java +++ b/solr/core/src/java/org/apache/solr/update/UpdateHandler.java @@ -171,6 +171,3 @@ public abstract class UpdateHandler implements SolrInfoMBean { optimizeCallbacks.add( listener ); } } - - - diff --git a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java index a3381199356..1d7d00fa66b 100644 --- a/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java +++ b/solr/core/src/test/org/apache/solr/BasicFunctionalityTest.java @@ -26,9 +26,10 @@ import java.util.Map; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; +import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LogMergePolicy; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.AppendedSolrParams; @@ -357,32 +358,32 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { IndexSchema ischema = new IndexSchema(solrConfig, getSchemaFile(), null); SchemaField f; // Solr field type - Fieldable luf; // Lucene field + IndexableField luf; // Lucene field f = ischema.getField("test_basictv"); luf = f.createField("test", 0f); assertTrue(f.storeTermVector()); - assertTrue(luf.isTermVectorStored()); + assertTrue(luf.storeTermVectors()); f = ischema.getField("test_notv"); luf = f.createField("test", 0f); assertTrue(!f.storeTermVector()); - assertTrue(!luf.isTermVectorStored()); + assertTrue(!luf.storeTermVectors()); f = ischema.getField("test_postv"); luf = f.createField("test", 0f); assertTrue(f.storeTermVector() && f.storeTermPositions()); - assertTrue(luf.isStorePositionWithTermVector()); + assertTrue(luf.storeTermVectorPositions()); f = ischema.getField("test_offtv"); luf = f.createField("test", 0f); assertTrue(f.storeTermVector() && f.storeTermOffsets()); - assertTrue(luf.isStoreOffsetWithTermVector()); + assertTrue(luf.storeTermVectorOffsets()); f = ischema.getField("test_posofftv"); luf = f.createField("test", 0f); assertTrue(f.storeTermVector() && f.storeTermPositions() && f.storeTermOffsets()); - assertTrue(luf.isStoreOffsetWithTermVector() && luf.isStorePositionWithTermVector()); + assertTrue(luf.storeTermVectorOffsets() && luf.storeTermVectorPositions()); } @@ -559,10 +560,10 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { core.execute(core.getRequestHandler(req.getParams().get(CommonParams.QT)), req, rsp); DocList dl = ((ResultContext) rsp.getValues().get("response")).docs; - org.apache.lucene.document.Document d = req.getSearcher().doc(dl.iterator().nextDoc()); + Document d = req.getSearcher().doc(dl.iterator().nextDoc()); // ensure field is not lazy, only works for Non-Numeric fields currently (if you change schema behind test, this may fail) - assertTrue( d.getFieldable("test_hlt") instanceof Field ); - assertTrue( d.getFieldable("title") instanceof Field ); + assertFalse( ((Field) d.getField("test_hlt")).getClass().getSimpleName().equals("LazyField")); + assertFalse( ((Field) d.getField("title")).getClass().getSimpleName().equals("LazyField")); req.close(); } @@ -582,10 +583,10 @@ public class BasicFunctionalityTest extends SolrTestCaseJ4 { DocList dl = ((ResultContext) rsp.getValues().get("response")).docs; DocIterator di = dl.iterator(); - org.apache.lucene.document.Document d = req.getSearcher().doc(di.nextDoc()); + Document d = req.getSearcher().doc(di.nextDoc()); // ensure field is lazy - assertTrue( !( d.getFieldable("test_hlt") instanceof Field ) ); - assertTrue( d.getFieldable("title") instanceof Field ); + assertTrue( ((Field) d.getField("test_hlt")).getClass().getSimpleName().equals("LazyField")); + assertFalse( ((Field) d.getField("title")).getClass().getSimpleName().equals("LazyField")); req.close(); } diff --git a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java index 12590a7f436..cff0b35e5f9 100644 --- a/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java +++ b/solr/core/src/test/org/apache/solr/core/TestArbitraryIndexDir.java @@ -26,6 +26,7 @@ import javax.xml.parsers.ParserConfigurationException; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryparser.classic.ParseException; @@ -105,8 +106,8 @@ public class TestArbitraryIndexDir extends AbstractSolrTestCase{ new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)) ); Document doc = new Document(); - doc.add(new Field("id", "2", Field.Store.YES, Field.Index.ANALYZED)); - doc.add(new Field("name", "name2", Field.Store.YES, Field.Index.ANALYZED)); + doc.add(new Field("id", TextField.TYPE_STORED, "2")); + doc.add(new Field("name", TextField.TYPE_STORED, "name2")); iw.addDocument(doc); iw.commit(); iw.close(); diff --git a/solr/core/src/test/org/apache/solr/schema/DateFieldTest.java b/solr/core/src/test/org/apache/solr/schema/DateFieldTest.java index 8cec0894f8c..0dbfd1c36f7 100644 --- a/solr/core/src/test/org/apache/solr/schema/DateFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/DateFieldTest.java @@ -18,9 +18,8 @@ package org.apache.solr.schema; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.LuceneTestCase; -import org.apache.solr.schema.DateField; import org.apache.solr.util.DateMathParser; import java.util.Date; import java.util.TimeZone; @@ -119,7 +118,7 @@ public class DateFieldTest extends LuceneTestCase { public void testCreateField() { int props = FieldProperties.INDEXED ^ FieldProperties.STORED; SchemaField sf = new SchemaField( "test", f, props, null ); - Fieldable out = (Field)f.createField(sf, "1995-12-31T23:59:59Z", 1.0f ); + IndexableField out = (Field)f.createField(sf, "1995-12-31T23:59:59Z", 1.0f ); assertEquals(820454399000l, f.toObject( out ).getTime() ); out = (Field)f.createField(sf, new Date(820454399000l), 1.0f ); diff --git a/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java b/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java index 12008e14d67..5a06b7e8368 100644 --- a/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java +++ b/solr/core/src/test/org/apache/solr/schema/PolyFieldTest.java @@ -16,8 +16,8 @@ package org.apache.solr.schema; * limitations under the License. */ -import org.apache.lucene.document.Fieldable; import org.apache.lucene.queries.function.ValueSource; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; @@ -83,12 +83,12 @@ public class PolyFieldTest extends SolrTestCaseJ4 { assertEquals(pt.getDimension(), 2); double[] xy = new double[]{35.0, -79.34}; String point = xy[0] + "," + xy[1]; - Fieldable[] fields = home.createFields(point, 2); + IndexableField[] fields = home.createFields(point, 2); assertEquals(fields.length, 3);//should be 3, we have a stored field //first two fields contain the values, third is just stored and contains the original for (int i = 0; i < 3; i++) { boolean hasValue = fields[1].tokenStreamValue() != null - || fields[1].getBinaryValue() != null + || fields[1].binaryValue() != null || fields[1].stringValue() != null; assertTrue("Doesn't have a value: " + fields[1], hasValue); } diff --git a/solr/core/src/test/org/apache/solr/search/TestSort.java b/solr/core/src/test/org/apache/solr/search/TestSort.java index f1cac7aa4d7..8347e2432f7 100755 --- a/solr/core/src/test/org/apache/solr/search/TestSort.java +++ b/solr/core/src/test/org/apache/solr/search/TestSort.java @@ -17,9 +17,13 @@ package org.apache.solr.search; +import java.io.IOException; +import java.util.*; + import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.StringField; import org.apache.lucene.index.IndexReader.AtomicReaderContext; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -29,16 +33,10 @@ import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.util._TestUtil; - -import org.apache.solr.request.SolrQueryRequest; - import org.apache.solr.SolrTestCaseJ4; - +import org.apache.solr.request.SolrQueryRequest; import org.junit.BeforeClass; -import java.io.IOException; -import java.util.*; - public class TestSort extends SolrTestCaseJ4 { @BeforeClass public static void beforeClass() throws Exception { @@ -152,8 +150,8 @@ public class TestSort extends SolrTestCaseJ4 { public void testSort() throws Exception { Directory dir = new RAMDirectory(); - Field f = new Field("f","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); - Field f2 = new Field("f2","0", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS); + Field f = new Field("f", StringField.TYPE_UNSTORED,"0"); + Field f2 = new Field("f2", StringField.TYPE_UNSTORED,"0"); for (int iterCnt = 0; iterCnt