From f252c064f2649a32695ad916afc8191997ef3202 Mon Sep 17 00:00:00 2001 From: Uwe Schindler Date: Wed, 1 Feb 2012 10:51:40 +0000 Subject: [PATCH] LUCENE-3735: Fix PayloadProcessorProvider to no longer use Directory for lookup, instead AtomicReader git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1239052 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/lucene/codecs/TermsConsumer.java | 8 ++++---- .../org/apache/lucene/index/IndexWriter.java | 1 - .../org/apache/lucene/index/MergeState.java | 4 ++-- .../index/PayloadProcessorProvider.java | 4 ++-- .../apache/lucene/index/SegmentMerger.java | 9 ++------- .../index/TestPayloadProcessorProvider.java | 20 +++++++++++-------- .../index/FacetsPayloadProcessorProvider.java | 16 +++++++++------ 7 files changed, 32 insertions(+), 30 deletions(-) diff --git a/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java b/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java index 8c3db640297..6cfbf5eae94 100644 --- a/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java +++ b/lucene/src/java/org/apache/lucene/codecs/TermsConsumer.java @@ -134,8 +134,8 @@ public abstract class TermsConsumer { // set PayloadProcessor if (mergeState.payloadProcessorProvider != null) { for (int i = 0; i < mergeState.readers.size(); i++) { - if (mergeState.dirPayloadProcessor[i] != null) { - mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term); + if (mergeState.readerPayloadProcessor[i] != null) { + mergeState.currentPayloadProcessor[i] = mergeState.readerPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term); } } } @@ -168,8 +168,8 @@ public abstract class TermsConsumer { // set PayloadProcessor if (mergeState.payloadProcessorProvider != null) { for (int i = 0; i < mergeState.readers.size(); i++) { - if (mergeState.dirPayloadProcessor[i] != null) { - mergeState.currentPayloadProcessor[i] = mergeState.dirPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term); + if (mergeState.readerPayloadProcessor[i] != null) { + mergeState.currentPayloadProcessor[i] = mergeState.readerPayloadProcessor[i].getProcessor(mergeState.fieldInfo.name, term); } } } diff --git a/lucene/src/java/org/apache/lucene/index/IndexWriter.java b/lucene/src/java/org/apache/lucene/index/IndexWriter.java index 7838dabbaca..f0c9c9540a7 100644 --- a/lucene/src/java/org/apache/lucene/index/IndexWriter.java +++ b/lucene/src/java/org/apache/lucene/index/IndexWriter.java @@ -38,7 +38,6 @@ import org.apache.lucene.index.DocumentsWriterPerThread.FlushedSegment; import org.apache.lucene.index.FieldInfos.FieldNumberBiMap; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.index.MergeState.CheckAbort; -import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; import org.apache.lucene.search.Query; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.CompoundFileDirectory; diff --git a/lucene/src/java/org/apache/lucene/index/MergeState.java b/lucene/src/java/org/apache/lucene/index/MergeState.java index c2bb14089fe..b42661b397b 100644 --- a/lucene/src/java/org/apache/lucene/index/MergeState.java +++ b/lucene/src/java/org/apache/lucene/index/MergeState.java @@ -19,7 +19,7 @@ package org.apache.lucene.index; import java.util.List; -import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; +import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor; import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; @@ -55,7 +55,7 @@ public class MergeState { // TODO: this is a FactoryFactory here basically // and we could make a codec(wrapper) to do all of this privately so IW is uninvolved public PayloadProcessorProvider payloadProcessorProvider; - public DirPayloadProcessor[] dirPayloadProcessor; + public ReaderPayloadProcessor[] readerPayloadProcessor; public PayloadProcessor[] currentPayloadProcessor; // TODO: get rid of this? it tells you which segments are 'aligned' (e.g. for bulk merging) diff --git a/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java b/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java index bf825c1dacd..9d9ea5a3f2c 100644 --- a/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java +++ b/lucene/src/java/org/apache/lucene/index/PayloadProcessorProvider.java @@ -52,7 +52,7 @@ public abstract class PayloadProcessorProvider { * concurrency issues, then you shouldn't worry about any such issues when * {@link PayloadProcessor}s are requested for different terms. */ - public static abstract class DirPayloadProcessor { + public static abstract class ReaderPayloadProcessor { /** Returns a {@link PayloadProcessor} for the given term. */ public abstract PayloadProcessor getProcessor(String field, BytesRef text) throws IOException; @@ -76,6 +76,6 @@ public abstract class PayloadProcessorProvider { * through which {@link PayloadProcessor}s can be obtained for each * {@link Term}, or null if none should be used. */ - public abstract DirPayloadProcessor getDirProcessor(Directory dir) throws IOException; + public abstract ReaderPayloadProcessor getReaderProcessor(AtomicReader reader) throws IOException; } diff --git a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java index e76658302b2..da7db9325cb 100644 --- a/lucene/src/java/org/apache/lucene/index/SegmentMerger.java +++ b/lucene/src/java/org/apache/lucene/index/SegmentMerger.java @@ -286,7 +286,7 @@ final class SegmentMerger { // Remap docIDs mergeState.docMaps = new int[numReaders][]; mergeState.docBase = new int[numReaders]; - mergeState.dirPayloadProcessor = new PayloadProcessorProvider.DirPayloadProcessor[numReaders]; + mergeState.readerPayloadProcessor = new PayloadProcessorProvider.ReaderPayloadProcessor[numReaders]; mergeState.currentPayloadProcessor = new PayloadProcessorProvider.PayloadProcessor[numReaders]; int docBase = 0; @@ -323,12 +323,7 @@ final class SegmentMerger { docBase += docCount; if (mergeState.payloadProcessorProvider != null) { - // TODO: the PayloadProcessorProvider should take AtomicReader as parameter - // and find out by itself if it can provide a processor: - if (!(reader.reader instanceof SegmentReader)) - throw new UnsupportedOperationException("Payload processing currently requires exclusively SegmentReaders to be merged."); - final Directory dir = ((SegmentReader) reader.reader).directory(); - mergeState.dirPayloadProcessor[i] = mergeState.payloadProcessorProvider.getDirProcessor(dir); + mergeState.readerPayloadProcessor[i] = mergeState.payloadProcessorProvider.getReaderProcessor(reader.reader); } i++; diff --git a/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java b/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java index ea8d6dfcd08..3a7fd13ce36 100644 --- a/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java +++ b/lucene/src/test/org/apache/lucene/index/TestPayloadProcessorProvider.java @@ -30,7 +30,7 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.Document; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; -import org.apache.lucene.index.PayloadProcessorProvider.DirPayloadProcessor; +import org.apache.lucene.index.PayloadProcessorProvider.ReaderPayloadProcessor; import org.apache.lucene.index.PayloadProcessorProvider.PayloadProcessor; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; @@ -42,20 +42,24 @@ public class TestPayloadProcessorProvider extends LuceneTestCase { private static final class PerDirPayloadProcessor extends PayloadProcessorProvider { - private Map processors; + private final Map processors; - public PerDirPayloadProcessor(Map processors) { + public PerDirPayloadProcessor(Map processors) { this.processors = processors; } @Override - public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException { - return processors.get(dir); + public ReaderPayloadProcessor getReaderProcessor(AtomicReader reader) throws IOException { + if (reader instanceof SegmentReader) { + return processors.get(((SegmentReader) reader).directory()); + } else { + throw new UnsupportedOperationException("This shouldnot happen in this test: Reader is no SegmentReader"); + } } } - private static final class PerTermPayloadProcessor extends DirPayloadProcessor { + private static final class PerTermPayloadProcessor extends ReaderPayloadProcessor { @Override public PayloadProcessor getProcessor(String field, BytesRef text) throws IOException { @@ -185,7 +189,7 @@ public class TestPayloadProcessorProvider extends LuceneTestCase { // Add two source dirs. By not adding the dest dir, we ensure its payloads // won't get processed. - Map processors = new HashMap(); + Map processors = new HashMap(); for (Directory d : dirs) { processors.put(d, new PerTermPayloadProcessor()); } @@ -241,7 +245,7 @@ public class TestPayloadProcessorProvider extends LuceneTestCase { // Add two source dirs. By not adding the dest dir, we ensure its payloads // won't get processed. - Map processors = new HashMap(); + Map processors = new HashMap(); processors.put(dir, new PerTermPayloadProcessor()); IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random, MockTokenizer.WHITESPACE, false))); writer.setPayloadProcessorProvider(new PerDirPayloadProcessor(processors)); diff --git a/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java b/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java index b69c3e6c21f..4c3991baa0d 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java +++ b/modules/facet/src/java/org/apache/lucene/facet/index/FacetsPayloadProcessorProvider.java @@ -7,7 +7,9 @@ import java.io.InputStream; import java.util.HashMap; import java.util.Map; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.PayloadProcessorProvider; +import org.apache.lucene.index.SegmentReader; import org.apache.lucene.index.Term; import org.apache.lucene.store.Directory; @@ -94,7 +96,7 @@ public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider { private final Directory workDir; - private final DirPayloadProcessor dirProcessor; + private final ReaderPayloadProcessor dirProcessor; /** * Construct FacetsPayloadProcessorProvider with FacetIndexingParams @@ -110,14 +112,16 @@ public class FacetsPayloadProcessorProvider extends PayloadProcessorProvider { } @Override - public DirPayloadProcessor getDirProcessor(Directory dir) throws IOException { - if (workDir != dir) { - return null; + public ReaderPayloadProcessor getReaderProcessor(AtomicReader reader) throws IOException { + if (reader instanceof SegmentReader) { + if (workDir == ((SegmentReader) reader).directory()) { + return dirProcessor; + } } - return dirProcessor; + return null; } - public static class FacetsDirPayloadProcessor extends DirPayloadProcessor { + public static class FacetsDirPayloadProcessor extends ReaderPayloadProcessor { private final Map termMap = new HashMap(1);