LUCENE-5666: add SortedSetFieldSource

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5666@1594211 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2014-05-13 12:23:30 +00:00
parent ac43e0a68d
commit a2e4ce4196
3 changed files with 165 additions and 5 deletions

View File

@ -44,12 +44,12 @@ public abstract class DocTermsIndexDocValues extends FunctionValues {
protected final CharsRef spareChars = new CharsRef(); protected final CharsRef spareChars = new CharsRef();
public DocTermsIndexDocValues(ValueSource vs, AtomicReaderContext context, String field) throws IOException { public DocTermsIndexDocValues(ValueSource vs, AtomicReaderContext context, String field) throws IOException {
try { this(vs, open(context, field));
termsIndex = DocValues.getSorted(context.reader(), field); }
} catch (RuntimeException e) {
throw new DocTermsIndexException(field, e); protected DocTermsIndexDocValues(ValueSource vs, SortedDocValues termsIndex) {
}
this.vs = vs; this.vs = vs;
this.termsIndex = termsIndex;
} }
protected abstract String toTerm(String readableValue); protected abstract String toTerm(String readableValue);
@ -162,6 +162,15 @@ public abstract class DocTermsIndexDocValues extends FunctionValues {
}; };
} }
// TODO: why?
static SortedDocValues open(AtomicReaderContext context, String field) throws IOException {
try {
return DocValues.getSorted(context.reader(), field);
} catch (RuntimeException e) {
throw new DocTermsIndexException(field, e);
}
}
/** /**
* Custom Exception to be thrown when the DocTermsIndex for a field cannot be generated * Custom Exception to be thrown when the DocTermsIndex for a field cannot be generated
*/ */

View File

@ -0,0 +1,90 @@
package org.apache.lucene.queries.function.valuesource;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.docvalues.DocTermsIndexDocValues;
import org.apache.lucene.search.SortedSetSelector;
/**
* Retrieves {@link FunctionValues} instances for multi-valued string based fields.
* <p>
* A SortedSetDocValues contains multiple values for a field, so this
* technique "selects" a value as the representative value for the document.
*
* @see SortedSetSelector
*/
public class SortedSetFieldSource extends FieldCacheSource {
protected final SortedSetSelector.Type selector;
public SortedSetFieldSource(String field) {
this(field, SortedSetSelector.Type.MIN);
}
public SortedSetFieldSource(String field, SortedSetSelector.Type selector) {
super(field);
this.selector = selector;
}
@Override
public FunctionValues getValues(Map context, AtomicReaderContext readerContext) throws IOException {
SortedSetDocValues sortedSet = DocValues.getSortedSet(readerContext.reader(), field);
SortedDocValues view = SortedSetSelector.wrap(sortedSet, selector);
return new DocTermsIndexDocValues(this, view) {
@Override
protected String toTerm(String readableValue) {
return readableValue;
}
@Override
public Object objectVal(int doc) {
return strVal(doc);
}
};
}
@Override
public String description() {
return "sortedset(" + field + ",selector=" + selector + ')';
}
@Override
public int hashCode() {
final int prime = 31;
int result = super.hashCode();
result = prime * result + ((selector == null) ? 0 : selector.hashCode());
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (!super.equals(obj)) return false;
if (getClass() != obj.getClass()) return false;
SortedSetFieldSource other = (SortedSetFieldSource) obj;
if (selector != other.selector) return false;
return true;
}
}

View File

@ -0,0 +1,61 @@
package org.apache.lucene.queries.function;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Collections;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.queries.function.valuesource.SortedSetFieldSource;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
@SuppressCodecs({"Lucene40", "Lucene41"}) // avoid codecs that don't support sortedset
public class TestSortedSetFieldSource extends LuceneTestCase {
public void testSimple() throws Exception {
Directory dir = newDirectory();
IndexWriter writer = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, null));
Document doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("baz")));
doc.add(newStringField("id", "2", Field.Store.YES));
writer.addDocument(doc);
doc = new Document();
doc.add(new SortedSetDocValuesField("value", new BytesRef("foo")));
doc.add(new SortedSetDocValuesField("value", new BytesRef("bar")));
doc.add(newStringField("id", "1", Field.Store.YES));
writer.addDocument(doc);
writer.forceMerge(1);
writer.shutdown();
DirectoryReader ir = DirectoryReader.open(dir);
AtomicReader ar = getOnlySegmentReader(ir);
ValueSource vs = new SortedSetFieldSource("value");
FunctionValues values = vs.getValues(Collections.emptyMap(), ar.getContext());
assertEquals("baz", values.strVal(0));
assertEquals("bar", values.strVal(1));
ir.close();
dir.close();
}
}