diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java index 6d51e251fae..ffbef5fd4ff 100644 --- a/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/docvalues/DocTermsIndexDocValues.java @@ -44,12 +44,12 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { protected final CharsRef spareChars = new CharsRef(); public DocTermsIndexDocValues(ValueSource vs, AtomicReaderContext context, String field) throws IOException { - try { - termsIndex = DocValues.getSorted(context.reader(), field); - } catch (RuntimeException e) { - throw new DocTermsIndexException(field, e); - } + this(vs, open(context, field)); + } + + protected DocTermsIndexDocValues(ValueSource vs, SortedDocValues termsIndex) { this.vs = vs; + this.termsIndex = termsIndex; } protected abstract String toTerm(String readableValue); @@ -162,6 +162,15 @@ public abstract class DocTermsIndexDocValues extends FunctionValues { }; } + // TODO: why? + static SortedDocValues open(AtomicReaderContext context, String field) throws IOException { + try { + return DocValues.getSorted(context.reader(), field); + } catch (RuntimeException e) { + throw new DocTermsIndexException(field, e); + } + } + /** * Custom Exception to be thrown when the DocTermsIndex for a field cannot be generated */ diff --git a/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SortedSetFieldSource.java b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SortedSetFieldSource.java new file mode 100644 index 00000000000..eda61c5f88b --- /dev/null +++ b/lucene/queries/src/java/org/apache/lucene/queries/function/valuesource/SortedSetFieldSource.java @@ -0,0 +1,90 @@ +package org.apache.lucene.queries.function.valuesource; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.queries.function.FunctionValues; +import org.apache.lucene.queries.function.docvalues.DocTermsIndexDocValues; +import org.apache.lucene.search.SortedSetSelector; + +/** + * Retrieves {@link FunctionValues} instances for multi-valued string based fields. + *

+ * A SortedSetDocValues contains multiple values for a field, so this + * technique "selects" a value as the representative value for the document. + * + * @see SortedSetSelector + */ +public class SortedSetFieldSource extends FieldCacheSource { + protected final SortedSetSelector.Type selector; + + public SortedSetFieldSource(String field) { + this(field, SortedSetSelector.Type.MIN); + } + + public SortedSetFieldSource(String field, SortedSetSelector.Type selector) { + super(field); + this.selector = selector; + } + + @Override + public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException { + SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field); + SortedDocValues view = SortedSetSelector.wrap(sortedSet, selector); + return new DocTermsIndexDocValues(this, view) { + @Override + protected String toTerm(String readableValue) { + return readableValue; + } + + @Override + public Object objectVal(int doc) { + return strVal(doc); + } + }; + } + + @Override + public String description() { + return "sortedset(" + field + ",selector=" + selector + ')'; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((selector == null) ? 0 : selector.hashCode()); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; + if (!super.equals(obj)) return false; + if (getClass() != obj.getClass()) return false; + SortedSetFieldSource other = (SortedSetFieldSource) obj; + if (selector != other.selector) return false; + return true; + } +} diff --git a/lucene/queries/src/test/org/apache/lucene/queries/function/TestSortedSetFieldSource.java b/lucene/queries/src/test/org/apache/lucene/queries/function/TestSortedSetFieldSource.java new file mode 100644 index 00000000000..d45594c44da --- /dev/null +++ b/lucene/queries/src/test/org/apache/lucene/queries/function/TestSortedSetFieldSource.java @@ -0,0 +1,61 @@ +package org.apache.lucene.queries.function; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Collections; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.SortedSetDocValuesField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.LuceneTestCase; +import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; + +@SuppressCodecs({"Lucene40", "Lucene41"}) // avoid codecs that don't support sortedset +public class TestSortedSetFieldSource extends LuceneTestCase { + public void testSimple() throws Exception { + Directory dir = newDirectory(); + IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null)); + Document doc = new Document(); + doc.add(new SortedSetDocValuesField("value", new BytesRef("baz"))); + doc.add(newStringField("id", "2", Field.Store.YES)); + writer.addDocument(doc); + doc = new Document(); + doc.add(new SortedSetDocValuesField("value", new BytesRef("foo"))); + doc.add(new SortedSetDocValuesField("value", new BytesRef("bar"))); + doc.add(newStringField("id", "1", Field.Store.YES)); + writer.addDocument(doc); + writer.forceMerge(1); + writer.shutdown(); + + DirectoryReader ir = DirectoryReader.open(dir); + AtomicReader ar = getOnlySegmentReader(ir); + + ValueSource vs = new SortedSetFieldSource("value"); + FunctionValues values = vs.getValues(Collections.emptyMap(), ar.getContext()); + assertEquals("baz", values.strVal(0)); + assertEquals("bar", values.strVal(1)); + ir.close(); + dir.close(); + } +}