Merge to trunk r1211584

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene3606@1211591 13f79535-47bb-0310-9956-ffa450edef68
2025-02-20 17:07:09 +00:00 · 2011-12-07 18:49:18 +00:00 · 2011-12-07 18:49:18 +00:00 · 13294b07cc
commit 13294b07cc
parent 40a2e04fc0 18369c5a5e
11 changed files with 135 additions and 16 deletions
--- a/dev-tools/maven/lucene/contrib/misc/pom.xml.template
+++ b/dev-tools/maven/lucene/contrib/misc/pom.xml.template
@ -54,11 +54,6 @@
    <outputDirectory>${build-directory}/classes/java</outputDirectory>
    <testOutputDirectory>${build-directory}/classes/test</testOutputDirectory>
    <sourceDirectory>src/java</sourceDirectory>
-    <resources>
-      <resource>
-        <directory>src/resources</directory>
-      </resource>
-    </resources>
    <testSourceDirectory>src/test</testSourceDirectory>
    <testResources>
      <testResource>
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -649,6 +649,13 @@ Changes in backwards compatibility policy
  FieldCacheTermsFilter.FieldCacheTermsFilterDocIdSet was removed and
  replaced by another internal implementation.  (Uwe Schindler)

+* LUCENE-3620: FilterIndexReader now overrides all methods of IndexReader that 
+  it should (note that some are still not overridden, as they should be 
+  overridden by sub-classes only). In the process, some methods of IndexReader 
+  were made final. This is not expected to affect many apps, since these methods
+  already delegate to abstract methods, which you had to already override 
+  anyway. (Shai Erera)
+  
 Security fixes

 * LUCENE-3588: Try harder to prevent SIGSEGV on cloned MMapIndexInputs:
--- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java
@ -31,6 +31,7 @@ import org.apache.lucene.index.codecs.NormsWriter;
 import org.apache.lucene.index.codecs.StoredFieldsWriter;
 import org.apache.lucene.index.codecs.PerDocConsumer;
 import org.apache.lucene.index.codecs.TermVectorsWriter;
+import org.apache.lucene.index.values.ValueType;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.IOContext;
 import org.apache.lucene.util.Bits;
@ -205,7 +206,11 @@ final class SegmentMerger {
        addIndexed(reader, mergeState.fieldInfos, reader.getFieldNames(FieldOption.STORES_PAYLOADS), false, false, false, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        addIndexed(reader, mergeState.fieldInfos, reader.getFieldNames(FieldOption.INDEXED), false, false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        mergeState.fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.UNINDEXED), false);
-        mergeState.fieldInfos.addOrUpdate(reader.getFieldNames(FieldOption.DOC_VALUES), false);
+        Collection<String> dvNames = reader.getFieldNames(FieldOption.DOC_VALUES);
+        mergeState.fieldInfos.addOrUpdate(dvNames, false);
+        for (String dvName : dvNames) {
+          mergeState.fieldInfos.fieldInfo(dvName).setDocValues(reader.docValues(dvName).type());
+        }
      }
    }
  }
--- a/lucene/src/java/org/apache/lucene/index/SegmentReader.java
+++ b/lucene/src/java/org/apache/lucene/index/SegmentReader.java
@ -466,6 +466,9 @@ public final class SegmentReader extends IndexReader implements Cloneable {
      else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) &&
                fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) {
        fieldSet.add(fi.name);
+      } 
+      else if (fi.hasDocValues() && fieldOption == IndexReader.FieldOption.DOC_VALUES) {
+        fieldSet.add(fi.name);
      }
    }
    return fieldSet;
--- a/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java
+++ b/lucene/src/java/org/apache/lucene/index/values/IndexDocValues.java
@ -210,18 +210,18 @@ public abstract class IndexDocValues implements Closeable {

    /**
     * Returns the internal array representation iff this {@link Source} uses an
-     * array as its inner representation, otherwise <code>null</code>.
+     * array as its inner representation, otherwise <code>UOE</code>.
     */
    public Object getArray() {
-      return null;
+      throw new UnsupportedOperationException("getArray is not supported");
    }
    
    /**
     * If this {@link Source} is sorted this method will return an instance of
-     * {@link SortedSource} otherwise <code>null</code>
+     * {@link SortedSource} otherwise <code>UOE</code>
     */
    public SortedSource asSortedSource() {
-      return null;
+      throw new UnsupportedOperationException("asSortedSource is not supported");
    }
  }

--- a/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
+++ b/lucene/src/test/org/apache/lucene/index/TestAddIndexes.java
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
+import org.apache.lucene.document.IndexDocValuesField;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
@ -46,6 +47,7 @@ import org.apache.lucene.index.codecs.lucene40.Lucene40SegmentInfosFormat;
 import org.apache.lucene.index.codecs.lucene40.Lucene40StoredFieldsFormat;
 import org.apache.lucene.index.codecs.lucene40.Lucene40TermVectorsFormat;
 import org.apache.lucene.index.codecs.pulsing.Pulsing40PostingsFormat;
+import org.apache.lucene.index.values.IndexDocValues;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.PhraseQuery;
 import org.apache.lucene.store.AlreadyClosedException;
@ -1221,5 +1223,48 @@ public class TestAddIndexes extends LuceneTestCase {
    }
    r3.close();
    d3.close();
-  } 
+  }
+  
+  public void testDocValues() throws IOException {
+    assumeFalse("preflex does not support docvalues", Codec.getDefault().getName().equals("Lucene3x"));
+    Directory d1 = newDirectory();
+    RandomIndexWriter w = new RandomIndexWriter(random, d1);
+    Document doc = new Document();
+    doc.add(newField("id", "1", StringField.TYPE_STORED));
+    IndexDocValuesField dv = new IndexDocValuesField("dv");
+    dv.setInt(1);
+    doc.add(dv);
+    w.addDocument(doc);
+    IndexReader r1 = w.getReader();
+    w.close();
+
+    Directory d2 = newDirectory();
+    w = new RandomIndexWriter(random, d2);
+    doc = new Document();
+    doc.add(newField("id", "2", StringField.TYPE_STORED));
+    dv = new IndexDocValuesField("dv");
+    dv.setInt(2);
+    doc.add(dv);
+    w.addDocument(doc);
+    IndexReader r2 = w.getReader();
+    w.close();
+
+    Directory d3 = newDirectory();
+    w = new RandomIndexWriter(random, d3);
+    w.addIndexes(new SlowMultiReaderWrapper(r1), new SlowMultiReaderWrapper(r2));
+    r1.close();
+    d1.close();
+    r2.close();
+    d2.close();
+
+    w.forceMerge(1);
+    IndexReader r3 = w.getReader();
+    w.close();
+    IndexReader sr = getOnlySegmentReader(r3);
+    assertEquals(2, sr.numDocs());
+    IndexDocValues docValues = sr.perDocValues().docValues("dv");
+    assertNotNull(docValues);
+    r3.close();
+    d3.close();
+  }
 }
--- a/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
+++ b/lucene/src/test/org/apache/lucene/index/values/TestTypePromotion.java
@ -15,6 +15,7 @@ import org.apache.lucene.index.IndexReader.ReaderContext;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.NoMergePolicy;
+import org.apache.lucene.index.SlowMultiReaderWrapper;
 import org.apache.lucene.index.codecs.Codec;
 import org.apache.lucene.index.values.IndexDocValues.Source;
 import org.apache.lucene.store.Directory;
@ -101,7 +102,9 @@ public class TestTypePromotion extends LuceneTestCase {
      } else {
        // do a real merge here
        IndexReader open = IndexReader.open(dir_2);
-        writer.addIndexes(open);
+        // we cannot use SlowMR for sorted bytes, because it returns a null sortedsource
+        boolean useSlowMRWrapper = types != SORTED_BYTES && random.nextBoolean();
+        writer.addIndexes(useSlowMRWrapper ? new SlowMultiReaderWrapper(open) : open);
        open.close();
      }
      dir_2.close();
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -286,6 +286,9 @@ Bug Fixes
  and added a merge() method to SolrSpellChecker instead. Previously if you extended SolrSpellChecker
  your spellchecker would not work in distributed fashion.  (James Dyer via rmuir)
  
+* SOLR-2509: StringIndexOutOfBoundsException in the spellchecker collate when the term contains
+  a hyphen. (Thomas Gambier caught the bug, Steffen Godskesen did the patch, via Erick Erickson)
+  
 Other Changes
 ----------------------

@ -410,6 +413,9 @@ Bug Fixes
 * SOLR-2819: Improved speed of parsing hex entities in HTMLStripCharFilter
  (Bernhard Berger, hossman)
  
+* SOLR-2509: StringIndexOutOfBoundsException in the spellchecker collate when the term contains
+  a hyphen. (Thomas Gambier caught the bug, Steffen Godskesen did the patch, via Erick Erickson)
+  
 Other Changes
 ----------------------
 * SOLR-2922: Upgrade commons-io and commons-lang to 2.1 and 2.6, respectively. (koji)
--- a/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
+++ b/solr/core/src/java/org/apache/solr/spelling/SpellingQueryConverter.java
@ -28,6 +28,7 @@ import java.util.regex.Pattern;
 import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
@ -111,12 +112,13 @@ public class SpellingQueryConverter extends QueryConverter  {
          TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class);
          PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class);
          PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
+          OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);
          stream.reset();
          while (stream.incrementToken()) {
            Token token = new Token();
            token.copyBuffer(termAtt.buffer(), 0, termAtt.length());
-            token.setStartOffset(matcher.start());
-            token.setEndOffset(matcher.end());
+            token.setStartOffset(matcher.start() + offsetAtt.startOffset());
+            token.setEndOffset(matcher.start() + offsetAtt.endOffset());
            token.setFlags(flagsAtt.getFlags());
            token.setType(typeAtt.type());
            token.setPayload(payloadAtt.getPayload());
--- a/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
+++ b/solr/core/src/test/org/apache/solr/handler/component/SpellCheckComponentTest.java
@ -114,7 +114,7 @@ public class SpellCheckComponentTest extends SolrTestCaseJ4 {
    assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","documemtsss broens", SpellCheckComponent.SPELLCHECK_COLLATE, "true")
       ,"/spellcheck/suggestions/collation=='document brown'"
    );
-    assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","pixma-a-b-c-d-e-f-g", SpellCheckComponent.SPELLCHECK_COLLATE, "true")
+    assertJQ(req("json.nl","map", "qt",rh, SpellCheckComponent.COMPONENT_NAME, "true", "q","pixma", SpellCheckComponent.SPELLCHECK_COLLATE, "true")
       ,"/spellcheck/suggestions/collation=='pixmaa'"
    );
  }
--- a/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java
+++ b/solr/core/src/test/org/apache/solr/spelling/SpellCheckCollatorTest.java
@ -46,9 +46,62 @@ public class SpellCheckCollatorTest extends SolrTestCaseJ4 {
 		assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "faith of homer")));
 		assertNull(h.validateUpdate(adoc("id", "4", "lowerfilt", "fat of homer")));
 		assertNull(h.validateUpdate(adoc("id", "5", "lowerfilt1", "peace")));
+		assertNull(h.validateUpdate(adoc("id", "6", "lowerfilt", "hyphenated word")));
 		assertNull(h.validateUpdate(commit()));
 	}
-	
+
+	@Test
+	public void testCollationWithHypens() throws Exception
+	{
+	  SolrCore core = h.getCore();
+    SearchComponent speller = core.getSearchComponent("spellcheck");
+    assertTrue("speller is null and it shouldn't be", speller != null);
+    
+    ModifiableSolrParams params = new ModifiableSolrParams();   
+    params.add(SpellCheckComponent.COMPONENT_NAME, "true");
+    params.add(SpellCheckComponent.SPELLCHECK_BUILD, "true");
+    params.add(SpellCheckComponent.SPELLCHECK_COUNT, "10");   
+    params.add(SpellCheckComponent.SPELLCHECK_COLLATE, "true");
+    
+    params.add(CommonParams.Q, "lowerfilt:(hypenated-wotd)");
+    {
+      SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
+      SolrQueryResponse rsp = new SolrQueryResponse();
+      rsp.add("responseHeader", new SimpleOrderedMap());
+      SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
+      handler.handleRequest(req, rsp);
+      req.close();
+      NamedList values = rsp.getValues();
+      NamedList spellCheck = (NamedList) values.get("spellcheck");
+      NamedList suggestions = (NamedList) spellCheck.get("suggestions");
+      List<String> collations = suggestions.getAll("collation");
+      assertTrue(collations.size()==1); 
+      String collation = collations.iterator().next();      
+      assertTrue("Incorrect collation: " + collation,"lowerfilt:(hyphenated-word)".equals(collation));
+    }
+
+    params.remove(CommonParams.Q);
+    params.add("defType", "dismax");
+    params.add("qf", "lowerfilt");
+    params.add(CommonParams.Q, "hypenated-wotd");
+    {
+      SolrRequestHandler handler = core.getRequestHandler("spellCheckCompRH");
+      SolrQueryResponse rsp = new SolrQueryResponse();
+      rsp.add("responseHeader", new SimpleOrderedMap());
+      SolrQueryRequest req = new LocalSolrQueryRequest(core, params);
+      handler.handleRequest(req, rsp);
+      req.close();
+      NamedList values = rsp.getValues();
+      NamedList spellCheck = (NamedList) values.get("spellcheck");
+      NamedList suggestions = (NamedList) spellCheck.get("suggestions");
+      List<String> collations = suggestions.getAll("collation");
+      assertTrue(collations.size()==1);
+      String collation = collations.iterator().next();
+      assertTrue("Incorrect collation: " + collation,"hyphenated-word".equals(collation));
+    }
+
+  }
+
 	@Test
 	public void testCollateWithFilter() throws Exception
 	{