LUCENE-8645: Intervals.fixField()

This commit is contained in:
Alan Woodward 2019-01-16 20:35:39 +00:00
parent e68697a6de
commit 87d68c8253
4 changed files with 115 additions and 0 deletions

View File

@ -187,6 +187,12 @@ New Features
* LUCENE-8622: Adds a minimum-should-match interval function that produces intervals
spanning a subset of a set of sources. (Alan Woodward)
* LUCENE-8645: Intervals.fixField() allows you to report intervals from one field
as if they came from another. (Alan Woodward)
* LUCENE-8646: New interval functions: Intervals.prefix() and Intervals.wildcard()
(Alan Woodward)
Improvements
* LUCENE-7997: Add BaseSimilarityTestCase to sanity check similarities.

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.intervals;
import java.io.IOException;
import java.util.Objects;
import java.util.Set;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.MatchesIterator;
class FixedFieldIntervalsSource extends IntervalsSource {
private final String field;
private final IntervalsSource source;
FixedFieldIntervalsSource(String field, IntervalsSource source) {
this.field = field;
this.source = source;
}
@Override
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
return source.intervals(this.field, ctx);
}
@Override
public MatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
return source.matches(this.field, ctx, doc);
}
@Override
public void extractTerms(String field, Set<Term> terms) {
source.extractTerms(this.field, terms);
}
@Override
public int minExtent() {
return source.minExtent();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
FixedFieldIntervalsSource that = (FixedFieldIntervalsSource) o;
return Objects.equals(field, that.field) &&
Objects.equals(source, that.source);
}
@Override
public int hashCode() {
return Objects.hash(field, source);
}
@Override
public String toString() {
return "FIELD(" + field + "," + source + ")";
}
}

View File

@ -159,6 +159,17 @@ public final class Intervals {
allowOverlaps ? IntervalFunction.UNORDERED : IntervalFunction.UNORDERED_NO_OVERLAP);
}
/**
* Create an {@link IntervalsSource} that always returns intervals from a specific field
*
* This is useful for comparing intervals across multiple fields, for example fields that
* have been analyzed differently, allowing you to search for stemmed terms near unstemmed
* terms, etc.
*/
public static IntervalsSource fixField(String field, IntervalsSource source) {
return new FixedFieldIntervalsSource(field, source);
}
/**
* Create a non-overlapping IntervalsSource
*

View File

@ -704,4 +704,26 @@ public class TestIntervals extends LuceneTestCase {
assertEquals(2, source.minExtent());
}
public void testFixedField() throws IOException {
IntervalsSource source = Intervals.phrase(
Intervals.term("alph"),
Intervals.fixField("field1", Intervals.term("hot")));
// We search in field2, but 'hot' will report intervals from field1
checkIntervals(source, "field2", 1, new int[][]{
{},
{ 1, 2 },
{},
{},
{},
{}
});
MatchesIterator mi = getMatches(source, 1, "field2");
assertNotNull(mi);
assertMatch(mi, 1, 2, 6, 18);
}
}