mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 01:19:02 +00:00
Upgrade to final lucene 8.5.0 snapshot (#53293)
Lucene 8.5.0 release candidates are imminent. This commit upgrades master to use the latest snapshot to check that there are no last-minute bugs or regressions.
This commit is contained in:
parent
5e3df18d56
commit
5c861cfe6e
@ -1,5 +1,5 @@
|
||||
elasticsearch = 7.7.0
|
||||
lucene = 8.5.0-snapshot-c4475920b08
|
||||
lucene = 8.5.0-snapshot-7f057455901
|
||||
|
||||
bundled_jdk_vendor = adoptopenjdk
|
||||
bundled_jdk = 13.0.2+8
|
||||
|
@ -0,0 +1 @@
|
||||
1219c9aca51a37ea3e22cf88ad2e8745d1a6e02f
|
@ -1 +0,0 @@
|
||||
48cb44f1dc8d3368d70581ffdbeab98ac5f5167f
|
@ -29,7 +29,6 @@ import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.BlendedTermQuery;
|
||||
import org.apache.lucene.queries.CommonTermsQuery;
|
||||
import org.apache.lucene.queries.XIntervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalQuery;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
@ -1420,33 +1419,33 @@ public class QueryAnalyzerTests extends ESTestCase {
|
||||
assertTermsEqual(result.extractions, new Term("field", "term1"), new Term("field", "term2"),
|
||||
new Term("field", "term3"), new Term("field", "term4"));
|
||||
|
||||
source = Intervals.ordered(Intervals.term("term1"), XIntervals.wildcard(new BytesRef("a*")));
|
||||
source = Intervals.ordered(Intervals.term("term1"), Intervals.wildcard(new BytesRef("a*")));
|
||||
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.matchAllDocs, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertTermsEqual(result.extractions, new Term("field", "term1"));
|
||||
|
||||
source = Intervals.ordered(XIntervals.wildcard(new BytesRef("a*")));
|
||||
source = Intervals.ordered(Intervals.wildcard(new BytesRef("a*")));
|
||||
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
|
||||
assertEquals(Result.UNKNOWN, result);
|
||||
|
||||
source = Intervals.or(Intervals.term("b"), XIntervals.wildcard(new BytesRef("a*")));
|
||||
source = Intervals.or(Intervals.term("b"), Intervals.wildcard(new BytesRef("a*")));
|
||||
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
|
||||
assertEquals(Result.UNKNOWN, result);
|
||||
|
||||
source = Intervals.ordered(Intervals.term("term1"), XIntervals.prefix(new BytesRef("a")));
|
||||
source = Intervals.ordered(Intervals.term("term1"), Intervals.prefix(new BytesRef("a")));
|
||||
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
|
||||
assertThat(result.verified, is(false));
|
||||
assertThat(result.matchAllDocs, is(false));
|
||||
assertThat(result.minimumShouldMatch, equalTo(1));
|
||||
assertTermsEqual(result.extractions, new Term("field", "term1"));
|
||||
|
||||
source = Intervals.ordered(XIntervals.prefix(new BytesRef("a")));
|
||||
source = Intervals.ordered(Intervals.prefix(new BytesRef("a")));
|
||||
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
|
||||
assertEquals(Result.UNKNOWN, result);
|
||||
|
||||
source = Intervals.or(Intervals.term("b"), XIntervals.prefix(new BytesRef("a")));
|
||||
source = Intervals.or(Intervals.term("b"), Intervals.prefix(new BytesRef("a")));
|
||||
result = analyze(new IntervalQuery("field", source), Version.CURRENT);
|
||||
assertEquals(Result.UNKNOWN, result);
|
||||
|
||||
|
@ -0,0 +1 @@
|
||||
b6f880fa08a44fcb2d50808f9eeb6189a293ce27
|
@ -1 +0,0 @@
|
||||
0748be5811dfe6725847d2e87890a990c58cc3de
|
@ -0,0 +1 @@
|
||||
9c5b8619795f69c225b5ec37b87cb34de0feccd4
|
@ -1 +0,0 @@
|
||||
f693cd60ad8ca9b7d3082f7b9ee6054b9c819b48
|
@ -0,0 +1 @@
|
||||
421e13b9fe09523e094ac708204d62d4ea5b6618
|
@ -1 +0,0 @@
|
||||
72c34e18af81ee1d18e9927fb95690fe056cbd4f
|
@ -0,0 +1 @@
|
||||
ff4ae9f3f3b0bc497f98c9bc47e943525669fc99
|
@ -1 +0,0 @@
|
||||
655438348dcad9a98b5affa76caa3d67aa4bee51
|
@ -0,0 +1 @@
|
||||
dd6430c037566cd3852b73b2ec31e59de24cfe58
|
@ -1 +0,0 @@
|
||||
b99147dad649fce0b0423e41f90c79e0f2fba2b7
|
@ -0,0 +1 @@
|
||||
dd4ca22b151a98a21e255bc1c54f0fadfee5ca4d
|
@ -1 +0,0 @@
|
||||
00ce3e23cf7aba8c1b3e777de92fd31ec1d4d814
|
@ -0,0 +1 @@
|
||||
e85f94d2747ddb560af0bc4d15f0cde45cf3ff30
|
@ -1 +0,0 @@
|
||||
2b03f79d61517d8e6a8744dbd89e61ad661f6a62
|
@ -0,0 +1 @@
|
||||
0365c37a03123ee8e30f75e44a1cb7d5ddd2fc52
|
@ -1 +0,0 @@
|
||||
1963afb27f340df8fc304d377971424832f4ce1a
|
@ -0,0 +1 @@
|
||||
d56b30f75b2df92da8c6c0965ce72e7abb86347b
|
@ -1 +0,0 @@
|
||||
fdff4122e8b8a2dbbc9de24be6963e7d7e33b794
|
@ -0,0 +1 @@
|
||||
39933692162e28c2719b60f499204b28236a2858
|
@ -1 +0,0 @@
|
||||
ca406661129d35008411365d2b6e747dc39378af
|
@ -0,0 +1 @@
|
||||
2e56cc12d2f77d82946299b66f3416f9e621b2f3
|
@ -1 +0,0 @@
|
||||
db053d5861406393254c28f6e46767879b504bb3
|
@ -0,0 +1 @@
|
||||
5cddb5b65e7ead641483dcc2ffb0e50ad8d26eb7
|
@ -1 +0,0 @@
|
||||
f5520ee7145f5d1ef02c7dc87483255d81b5bc6c
|
@ -0,0 +1 @@
|
||||
cfabaedd80fe600cc7fda5ee12d90927fa96d87c
|
@ -1 +0,0 @@
|
||||
2d81c0a3473cc865e7c4858890b7fbfb869bfbf8
|
@ -0,0 +1 @@
|
||||
e90fbcc53531978fc03ef847ba396d4cdd89c7e4
|
@ -1 +0,0 @@
|
||||
6d009afeb485307dce111afb8bb157ebbbb0f212
|
@ -0,0 +1 @@
|
||||
dd0b4cef132a50b3fa919f214a5316fcc78c46ea
|
@ -1 +0,0 @@
|
||||
3a62908ec9eb6e826a56e697322c4c6b6c9a8573
|
@ -0,0 +1 @@
|
||||
c90cc35089afc3f7802668c3969b5e7391b6d15a
|
@ -1 +0,0 @@
|
||||
d71d54ed6e0cf482ce16cf4f419441d83f646827
|
@ -0,0 +1 @@
|
||||
e324233cb8f069e4f6abcbab47368a83c3696f36
|
@ -1 +0,0 @@
|
||||
181915a7d21b73dff16591b20cdee22648e4181f
|
@ -0,0 +1 @@
|
||||
985a451e5f564c84271419a446e044ab589d6f22
|
@ -1 +0,0 @@
|
||||
d3d0bb76d9f4a5368d286a934615dbca7703b3d8
|
@ -0,0 +1 @@
|
||||
beff7cafe0fa5330b9b915825b69321faf0fcaa9
|
@ -1 +0,0 @@
|
||||
95b9fd35e91a34c090ecf301d4dc29cabd198e6f
|
@ -0,0 +1 @@
|
||||
b9256d3a2a64d79435a4c726af8a3c28c2b77d7f
|
@ -1 +0,0 @@
|
||||
1c8da46c3a172830372dfc23e18e9151bb14562c
|
@ -0,0 +1 @@
|
||||
f38949db273a910e94a57229db2d8f3e4aef5e1f
|
@ -1 +0,0 @@
|
||||
b3ad5d3476ed85a529892962d057518555ccfcc9
|
@ -1,861 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.queries;
|
||||
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.queries.intervals.IntervalIterator;
|
||||
import org.apache.lucene.queries.intervals.IntervalMatchesIterator;
|
||||
import org.apache.lucene.queries.intervals.IntervalQuery;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.MatchesIterator;
|
||||
import org.apache.lucene.search.MatchesUtils;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryVisitor;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Replacement for {@link Intervals#wildcard(BytesRef)} and {@link Intervals#prefix(BytesRef)}
|
||||
* until LUCENE-9050 is merged
|
||||
*/
|
||||
public final class XIntervals {
|
||||
|
||||
private XIntervals() {}
|
||||
|
||||
public static IntervalsSource wildcard(BytesRef wildcard) {
|
||||
CompiledAutomaton ca = new CompiledAutomaton(WildcardQuery.toAutomaton(new Term("", wildcard)));
|
||||
return new MultiTermIntervalsSource(ca, 128, wildcard.utf8ToString());
|
||||
}
|
||||
|
||||
public static IntervalsSource prefix(BytesRef prefix) {
|
||||
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefix));
|
||||
return new MultiTermIntervalsSource(ca, 128, prefix.utf8ToString());
|
||||
}
|
||||
|
||||
public static IntervalsSource multiterm(CompiledAutomaton ca, String label) {
|
||||
return new MultiTermIntervalsSource(ca, 128, label);
|
||||
}
|
||||
|
||||
static class MultiTermIntervalsSource extends IntervalsSource {
|
||||
|
||||
private final CompiledAutomaton automaton;
|
||||
private final int maxExpansions;
|
||||
private final String pattern;
|
||||
|
||||
MultiTermIntervalsSource(CompiledAutomaton automaton, int maxExpansions, String pattern) {
|
||||
this.automaton = automaton;
|
||||
if (maxExpansions > BooleanQuery.getMaxClauseCount()) {
|
||||
throw new IllegalArgumentException("maxExpansions [" + maxExpansions
|
||||
+ "] cannot be greater than BooleanQuery.getMaxClauseCount [" + BooleanQuery.getMaxClauseCount() + "]");
|
||||
}
|
||||
this.maxExpansions = maxExpansions;
|
||||
this.pattern = pattern;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||
Terms terms = ctx.reader().terms(field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
List<IntervalIterator> subSources = new ArrayList<>();
|
||||
TermsEnum te = automaton.getTermsEnum(terms);
|
||||
BytesRef term;
|
||||
int count = 0;
|
||||
while ((term = te.next()) != null) {
|
||||
subSources.add(TermIntervalsSource.intervals(term, te));
|
||||
if (++count > maxExpansions) {
|
||||
throw new IllegalStateException("Automaton [" + this.pattern + "] expanded to too many terms (limit "
|
||||
+ maxExpansions + ")");
|
||||
}
|
||||
}
|
||||
if (subSources.size() == 0) {
|
||||
return null;
|
||||
}
|
||||
return new DisjunctionIntervalIterator(subSources);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
|
||||
Terms terms = ctx.reader().terms(field);
|
||||
if (terms == null) {
|
||||
return null;
|
||||
}
|
||||
List<MatchesIterator> subMatches = new ArrayList<>();
|
||||
TermsEnum te = automaton.getTermsEnum(terms);
|
||||
BytesRef term;
|
||||
int count = 0;
|
||||
while ((term = te.next()) != null) {
|
||||
MatchesIterator mi = XIntervals.TermIntervalsSource.matches(te, doc);
|
||||
if (mi != null) {
|
||||
subMatches.add(mi);
|
||||
if (count++ > maxExpansions) {
|
||||
throw new IllegalStateException("Automaton " + term + " expanded to too many terms (limit " + maxExpansions + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
MatchesIterator mi = MatchesUtils.disjunction(subMatches);
|
||||
if (mi == null) {
|
||||
return null;
|
||||
}
|
||||
return new IntervalMatchesIterator() {
|
||||
@Override
|
||||
public int gaps() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
return mi.next();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return mi.startPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return mi.endPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return mi.startOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return mi.endOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() throws IOException {
|
||||
return mi.getSubMatches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
return mi.getQuery();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(String field, QueryVisitor visitor) {
|
||||
visitor.visitLeaf(new IntervalQuery(field, this));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
MultiTermIntervalsSource that = (MultiTermIntervalsSource) o;
|
||||
return maxExpansions == that.maxExpansions &&
|
||||
Objects.equals(automaton, that.automaton) &&
|
||||
Objects.equals(pattern, that.pattern);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(automaton, maxExpansions, pattern);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "MultiTerm(" + pattern + ")";
|
||||
}
|
||||
}
|
||||
|
||||
static class DisiWrapper {
|
||||
|
||||
public final DocIdSetIterator iterator;
|
||||
public final IntervalIterator intervals;
|
||||
public final long cost;
|
||||
public final float matchCost; // the match cost for two-phase iterators, 0 otherwise
|
||||
public int doc; // the current doc, used for comparison
|
||||
public DisiWrapper next; // reference to a next element, see #topList
|
||||
|
||||
// An approximation of the iterator, or the iterator itself if it does not
|
||||
// support two-phase iteration
|
||||
public final DocIdSetIterator approximation;
|
||||
|
||||
DisiWrapper(IntervalIterator iterator) {
|
||||
this.intervals = iterator;
|
||||
this.iterator = iterator;
|
||||
this.cost = iterator.cost();
|
||||
this.doc = -1;
|
||||
this.approximation = iterator;
|
||||
this.matchCost = iterator.matchCost();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static final class DisiPriorityQueue implements Iterable<DisiWrapper> {
|
||||
|
||||
static int leftNode(int node) {
|
||||
return ((node + 1) << 1) - 1;
|
||||
}
|
||||
|
||||
static int rightNode(int leftNode) {
|
||||
return leftNode + 1;
|
||||
}
|
||||
|
||||
static int parentNode(int node) {
|
||||
return ((node + 1) >>> 1) - 1;
|
||||
}
|
||||
|
||||
private final DisiWrapper[] heap;
|
||||
private int size;
|
||||
|
||||
DisiPriorityQueue(int maxSize) {
|
||||
heap = new DisiWrapper[maxSize];
|
||||
size = 0;
|
||||
}
|
||||
|
||||
public int size() {
|
||||
return size;
|
||||
}
|
||||
|
||||
public DisiWrapper top() {
|
||||
return heap[0];
|
||||
}
|
||||
|
||||
/** Get the list of scorers which are on the current doc. */
|
||||
DisiWrapper topList() {
|
||||
final DisiWrapper[] heap = this.heap;
|
||||
final int size = this.size;
|
||||
DisiWrapper list = heap[0];
|
||||
list.next = null;
|
||||
if (size >= 3) {
|
||||
list = topList(list, heap, size, 1);
|
||||
list = topList(list, heap, size, 2);
|
||||
} else if (size == 2 && heap[1].doc == list.doc) {
|
||||
list = prepend(heap[1], list);
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
// prepend w1 (iterator) to w2 (list)
|
||||
private DisiWrapper prepend(DisiWrapper w1, DisiWrapper w2) {
|
||||
w1.next = w2;
|
||||
return w1;
|
||||
}
|
||||
|
||||
private DisiWrapper topList(DisiWrapper list, DisiWrapper[] heap,
|
||||
int size, int i) {
|
||||
final DisiWrapper w = heap[i];
|
||||
if (w.doc == list.doc) {
|
||||
list = prepend(w, list);
|
||||
final int left = leftNode(i);
|
||||
final int right = left + 1;
|
||||
if (right < size) {
|
||||
list = topList(list, heap, size, left);
|
||||
list = topList(list, heap, size, right);
|
||||
} else if (left < size && heap[left].doc == list.doc) {
|
||||
list = prepend(heap[left], list);
|
||||
}
|
||||
}
|
||||
return list;
|
||||
}
|
||||
|
||||
public DisiWrapper add(DisiWrapper entry) {
|
||||
final DisiWrapper[] heap = this.heap;
|
||||
final int size = this.size;
|
||||
heap[size] = entry;
|
||||
upHeap(size);
|
||||
this.size = size + 1;
|
||||
return heap[0];
|
||||
}
|
||||
|
||||
public DisiWrapper pop() {
|
||||
final DisiWrapper[] heap = this.heap;
|
||||
final DisiWrapper result = heap[0];
|
||||
final int i = --size;
|
||||
heap[0] = heap[i];
|
||||
heap[i] = null;
|
||||
downHeap(i);
|
||||
return result;
|
||||
}
|
||||
|
||||
DisiWrapper updateTop() {
|
||||
downHeap(size);
|
||||
return heap[0];
|
||||
}
|
||||
|
||||
void upHeap(int i) {
|
||||
final DisiWrapper node = heap[i];
|
||||
final int nodeDoc = node.doc;
|
||||
int j = parentNode(i);
|
||||
while (j >= 0 && nodeDoc < heap[j].doc) {
|
||||
heap[i] = heap[j];
|
||||
i = j;
|
||||
j = parentNode(j);
|
||||
}
|
||||
heap[i] = node;
|
||||
}
|
||||
|
||||
void downHeap(int size) {
|
||||
int i = 0;
|
||||
final DisiWrapper node = heap[0];
|
||||
int j = leftNode(i);
|
||||
if (j < size) {
|
||||
int k = rightNode(j);
|
||||
if (k < size && heap[k].doc < heap[j].doc) {
|
||||
j = k;
|
||||
}
|
||||
if (heap[j].doc < node.doc) {
|
||||
do {
|
||||
heap[i] = heap[j];
|
||||
i = j;
|
||||
j = leftNode(i);
|
||||
k = rightNode(j);
|
||||
if (k < size && heap[k].doc < heap[j].doc) {
|
||||
j = k;
|
||||
}
|
||||
} while (j < size && heap[j].doc < node.doc);
|
||||
heap[i] = node;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Iterator<DisiWrapper> iterator() {
|
||||
return Arrays.asList(heap).subList(0, size).iterator();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static class DisjunctionDISIApproximation extends DocIdSetIterator {
|
||||
|
||||
final DisiPriorityQueue subIterators;
|
||||
final long cost;
|
||||
|
||||
DisjunctionDISIApproximation(DisiPriorityQueue subIterators) {
|
||||
this.subIterators = subIterators;
|
||||
long cost = 0;
|
||||
for (DisiWrapper w : subIterators) {
|
||||
cost += w.cost;
|
||||
}
|
||||
this.cost = cost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return cost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return subIterators.top().doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
DisiWrapper top = subIterators.top();
|
||||
final int doc = top.doc;
|
||||
do {
|
||||
top.doc = top.approximation.nextDoc();
|
||||
top = subIterators.updateTop();
|
||||
} while (top.doc == doc);
|
||||
|
||||
return top.doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
DisiWrapper top = subIterators.top();
|
||||
do {
|
||||
top.doc = top.approximation.advance(target);
|
||||
top = subIterators.updateTop();
|
||||
} while (top.doc < target);
|
||||
|
||||
return top.doc;
|
||||
}
|
||||
}
|
||||
|
||||
static class DisjunctionIntervalIterator extends IntervalIterator {
|
||||
|
||||
final DocIdSetIterator approximation;
|
||||
final PriorityQueue<IntervalIterator> intervalQueue;
|
||||
final DisiPriorityQueue disiQueue;
|
||||
final List<IntervalIterator> iterators;
|
||||
final float matchCost;
|
||||
|
||||
IntervalIterator current = EMPTY;
|
||||
|
||||
DisjunctionIntervalIterator(List<IntervalIterator> iterators) {
|
||||
this.disiQueue = new DisiPriorityQueue(iterators.size());
|
||||
for (IntervalIterator it : iterators) {
|
||||
disiQueue.add(new DisiWrapper(it));
|
||||
}
|
||||
this.approximation = new DisjunctionDISIApproximation(disiQueue);
|
||||
this.iterators = iterators;
|
||||
this.intervalQueue = new PriorityQueue<IntervalIterator>(iterators.size()) {
|
||||
@Override
|
||||
protected boolean lessThan(IntervalIterator a, IntervalIterator b) {
|
||||
return a.end() < b.end() || (a.end() == b.end() && a.start() >= b.start());
|
||||
}
|
||||
};
|
||||
float costsum = 0;
|
||||
for (IntervalIterator it : iterators) {
|
||||
costsum += it.cost();
|
||||
}
|
||||
this.matchCost = costsum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return matchCost;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return current.start();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return current.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return current.gaps();
|
||||
}
|
||||
|
||||
private void reset() throws IOException {
|
||||
intervalQueue.clear();
|
||||
for (DisiWrapper dw = disiQueue.topList(); dw != null; dw = dw.next) {
|
||||
dw.intervals.nextInterval();
|
||||
intervalQueue.add(dw.intervals);
|
||||
}
|
||||
current = EMPTY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (current == EMPTY || current == EXHAUSTED) {
|
||||
if (intervalQueue.size() > 0) {
|
||||
current = intervalQueue.top();
|
||||
}
|
||||
return current.start();
|
||||
}
|
||||
int start = current.start(), end = current.end();
|
||||
while (intervalQueue.size() > 0 && contains(intervalQueue.top(), start, end)) {
|
||||
IntervalIterator it = intervalQueue.pop();
|
||||
if (it != null && it.nextInterval() != NO_MORE_INTERVALS) {
|
||||
intervalQueue.add(it);
|
||||
}
|
||||
}
|
||||
if (intervalQueue.size() == 0) {
|
||||
current = EXHAUSTED;
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
current = intervalQueue.top();
|
||||
return current.start();
|
||||
}
|
||||
|
||||
private boolean contains(IntervalIterator it, int start, int end) {
|
||||
return start >= it.start() && start <= it.end() && end >= it.start() && end <= it.end();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return approximation.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
int doc = approximation.nextDoc();
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = approximation.advance(target);
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return approximation.cost();
|
||||
}
|
||||
}
|
||||
|
||||
private static final IntervalIterator EMPTY = new IntervalIterator() {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
private static final IntervalIterator EXHAUSTED = new IntervalIterator() {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() {
|
||||
return NO_MORE_INTERVALS;
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
static class TermIntervalsSource extends IntervalsSource {
|
||||
|
||||
final BytesRef term;
|
||||
|
||||
TermIntervalsSource(BytesRef term) {
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalIterator intervals(String field, LeafReaderContext ctx) throws IOException {
|
||||
Terms terms = ctx.reader().terms(field);
|
||||
if (terms == null)
|
||||
return null;
|
||||
if (terms.hasPositions() == false) {
|
||||
throw new IllegalArgumentException("Cannot create an IntervalIterator over field " + field
|
||||
+ " because it has no indexed positions");
|
||||
}
|
||||
TermsEnum te = terms.iterator();
|
||||
if (te.seekExact(term) == false) {
|
||||
return null;
|
||||
}
|
||||
return intervals(term, te);
|
||||
}
|
||||
|
||||
static IntervalIterator intervals(BytesRef term, TermsEnum te) throws IOException {
|
||||
PostingsEnum pe = te.postings(null, PostingsEnum.POSITIONS);
|
||||
float cost = termPositionsCost(te);
|
||||
return new IntervalIterator() {
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return pe.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
int doc = pe.nextDoc();
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = pe.advance(target);
|
||||
reset();
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return pe.cost();
|
||||
}
|
||||
|
||||
int pos = -1, upto;
|
||||
|
||||
@Override
|
||||
public int start() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int end() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextInterval() throws IOException {
|
||||
if (upto <= 0)
|
||||
return pos = NO_MORE_INTERVALS;
|
||||
upto--;
|
||||
return pos = pe.nextPosition();
|
||||
}
|
||||
|
||||
@Override
|
||||
public float matchCost() {
|
||||
return cost;
|
||||
}
|
||||
|
||||
private void reset() throws IOException {
|
||||
if (pe.docID() == NO_MORE_DOCS) {
|
||||
upto = -1;
|
||||
pos = NO_MORE_INTERVALS;
|
||||
}
|
||||
else {
|
||||
upto = pe.freq();
|
||||
pos = -1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return term.utf8ToString() + ":" + super.toString();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public IntervalMatchesIterator matches(String field, LeafReaderContext ctx, int doc) throws IOException {
|
||||
Terms terms = ctx.reader().terms(field);
|
||||
if (terms == null)
|
||||
return null;
|
||||
if (terms.hasPositions() == false) {
|
||||
throw new IllegalArgumentException("Cannot create an IntervalIterator over field " + field
|
||||
+ " because it has no indexed positions");
|
||||
}
|
||||
TermsEnum te = terms.iterator();
|
||||
if (te.seekExact(term) == false) {
|
||||
return null;
|
||||
}
|
||||
return matches(te, doc);
|
||||
}
|
||||
|
||||
static IntervalMatchesIterator matches(TermsEnum te, int doc) throws IOException {
|
||||
PostingsEnum pe = te.postings(null, PostingsEnum.OFFSETS);
|
||||
if (pe.advance(doc) != doc) {
|
||||
return null;
|
||||
}
|
||||
return new IntervalMatchesIterator() {
|
||||
|
||||
@Override
|
||||
public int gaps() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int width() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int upto = pe.freq();
|
||||
int pos = -1;
|
||||
|
||||
@Override
|
||||
public boolean next() throws IOException {
|
||||
if (upto <= 0) {
|
||||
pos = IntervalIterator.NO_MORE_INTERVALS;
|
||||
return false;
|
||||
}
|
||||
upto--;
|
||||
pos = pe.nextPosition();
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startPosition() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endPosition() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() throws IOException {
|
||||
return pe.startOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() throws IOException {
|
||||
return pe.endOffset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public MatchesIterator getSubMatches() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query getQuery() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
@Override
|
||||
public int minExtent() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Collection<IntervalsSource> pullUpDisjunctions() {
|
||||
return Collections.singleton(this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
TermIntervalsSource that = (TermIntervalsSource) o;
|
||||
return Objects.equals(term, that.term);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return term.utf8ToString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(String field, QueryVisitor visitor) {
|
||||
visitor.consumeTerms(new IntervalQuery(field, this), new Term(field, term));
|
||||
}
|
||||
|
||||
private static final int TERM_POSNS_SEEK_OPS_PER_DOC = 128;
|
||||
|
||||
private static final int TERM_OPS_PER_POS = 7;
|
||||
|
||||
static float termPositionsCost(TermsEnum termsEnum) throws IOException {
|
||||
int docFreq = termsEnum.docFreq();
|
||||
assert docFreq > 0;
|
||||
long totalTermFreq = termsEnum.totalTermFreq();
|
||||
float expOccurrencesInMatchingDoc = totalTermFreq / (float) docFreq;
|
||||
return TERM_POSNS_SEEK_OPS_PER_DOC + expOccurrencesInMatchingDoc * TERM_OPS_PER_POS;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -33,7 +33,6 @@ import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.XIntervals;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
import org.apache.lucene.search.AutomatonQuery;
|
||||
@ -427,7 +426,7 @@ public class TextFieldMapper extends FieldMapper {
|
||||
|
||||
public IntervalsSource intervals(BytesRef term) {
|
||||
if (term.length > maxChars) {
|
||||
return XIntervals.prefix(term);
|
||||
return Intervals.prefix(term);
|
||||
}
|
||||
if (term.length >= minChars) {
|
||||
return Intervals.fixField(name(), Intervals.term(term));
|
||||
@ -437,7 +436,7 @@ public class TextFieldMapper extends FieldMapper {
|
||||
sb.append("?");
|
||||
}
|
||||
String wildcardTerm = sb.toString();
|
||||
return Intervals.or(Intervals.fixField(name(), XIntervals.wildcard(new BytesRef(wildcardTerm))), Intervals.term(term));
|
||||
return Intervals.or(Intervals.fixField(name(), Intervals.wildcard(new BytesRef(wildcardTerm))), Intervals.term(term));
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -681,7 +680,7 @@ public class TextFieldMapper extends FieldMapper {
|
||||
if (prefixFieldType != null) {
|
||||
return prefixFieldType.intervals(normalizedTerm);
|
||||
}
|
||||
return XIntervals.prefix(normalizedTerm);
|
||||
return Intervals.prefix(normalizedTerm);
|
||||
}
|
||||
IntervalBuilder builder = new IntervalBuilder(name(), analyzer == null ? searchAnalyzer() : analyzer);
|
||||
return builder.analyzeText(text, maxGaps, ordered);
|
||||
|
@ -21,7 +21,6 @@ package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.XIntervals;
|
||||
import org.apache.lucene.queries.intervals.FilteredIntervalsSource;
|
||||
import org.apache.lucene.queries.intervals.IntervalIterator;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
@ -655,13 +654,12 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||
analyzer = fieldType.searchAnalyzer();
|
||||
}
|
||||
BytesRef normalizedTerm = analyzer.normalize(useField, pattern);
|
||||
// TODO Intervals.wildcard() should take BytesRef
|
||||
source = Intervals.fixField(useField, XIntervals.wildcard(normalizedTerm));
|
||||
source = Intervals.fixField(useField, Intervals.wildcard(normalizedTerm));
|
||||
}
|
||||
else {
|
||||
checkPositions(fieldType);
|
||||
BytesRef normalizedTerm = analyzer.normalize(fieldType.name(), pattern);
|
||||
source = XIntervals.wildcard(normalizedTerm);
|
||||
source = Intervals.wildcard(normalizedTerm);
|
||||
}
|
||||
return source;
|
||||
}
|
||||
@ -798,7 +796,7 @@ public abstract class IntervalsSourceProvider implements NamedWriteable, ToXCont
|
||||
FuzzyQuery fq = new FuzzyQuery(new Term(fieldType.name(), normalizedTerm),
|
||||
fuzziness.asDistance(term), prefixLength, 128, transpositions);
|
||||
CompiledAutomaton[] automata = fq.getAutomata();
|
||||
source = XIntervals.multiterm(automata[automata.length - 1], term);
|
||||
source = Intervals.multiterm(automata[automata.length - 1], term);
|
||||
if (useField != null) {
|
||||
source = Intervals.fixField(useField, source);
|
||||
}
|
||||
|
@ -20,7 +20,6 @@
|
||||
package org.elasticsearch.index.query;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queries.XIntervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalQuery;
|
||||
import org.apache.lucene.queries.intervals.Intervals;
|
||||
import org.apache.lucene.queries.intervals.IntervalsSource;
|
||||
@ -449,7 +448,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
String json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
|
||||
"\"prefix\" : { \"prefix\" : \"term\" } } } }";
|
||||
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
|
||||
Query expected = new IntervalQuery(STRING_FIELD_NAME, XIntervals.prefix(new BytesRef("term")));
|
||||
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.prefix(new BytesRef("term")));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
|
||||
@ -476,7 +475,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
"\"prefix\" : { \"prefix\" : \"t\" } } } }";
|
||||
builder = (IntervalQueryBuilder) parseQuery(short_prefix_json);
|
||||
expected = new IntervalQuery(PREFIXED_FIELD, Intervals.or(
|
||||
Intervals.fixField(PREFIXED_FIELD + "._index_prefix", XIntervals.wildcard(new BytesRef("t?"))),
|
||||
Intervals.fixField(PREFIXED_FIELD + "._index_prefix", Intervals.wildcard(new BytesRef("t?"))),
|
||||
Intervals.term("t")));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
@ -508,7 +507,7 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
"\"wildcard\" : { \"pattern\" : \"Te?m\" } } } }";
|
||||
|
||||
IntervalQueryBuilder builder = (IntervalQueryBuilder) parseQuery(json);
|
||||
Query expected = new IntervalQuery(STRING_FIELD_NAME, XIntervals.wildcard(new BytesRef("te?m")));
|
||||
Query expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.wildcard(new BytesRef("te?m")));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String no_positions_json = "{ \"intervals\" : { \"" + NO_POSITIONS_FIELD + "\": { " +
|
||||
@ -522,14 +521,14 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
"\"wildcard\" : { \"pattern\" : \"Te?m\", \"analyzer\" : \"keyword\" } } } }";
|
||||
|
||||
builder = (IntervalQueryBuilder) parseQuery(keyword_json);
|
||||
expected = new IntervalQuery(STRING_FIELD_NAME, XIntervals.wildcard(new BytesRef("Te?m")));
|
||||
expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.wildcard(new BytesRef("Te?m")));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String fixed_field_json = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
|
||||
"\"wildcard\" : { \"pattern\" : \"Te?m\", \"use_field\" : \"masked_field\" } } } }";
|
||||
|
||||
builder = (IntervalQueryBuilder) parseQuery(fixed_field_json);
|
||||
expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.fixField(MASKED_FIELD, XIntervals.wildcard(new BytesRef("te?m"))));
|
||||
expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.fixField(MASKED_FIELD, Intervals.wildcard(new BytesRef("te?m"))));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
|
||||
String fixed_field_json_no_positions = "{ \"intervals\" : { \"" + STRING_FIELD_NAME + "\": { " +
|
||||
@ -544,14 +543,14 @@ public class IntervalQueryBuilderTests extends AbstractQueryTestCase<IntervalQue
|
||||
|
||||
builder = (IntervalQueryBuilder) parseQuery(fixed_field_analyzer_json);
|
||||
expected = new IntervalQuery(STRING_FIELD_NAME, Intervals.fixField(MASKED_FIELD,
|
||||
XIntervals.wildcard(new BytesRef("Te?m"))));
|
||||
Intervals.wildcard(new BytesRef("Te?m"))));
|
||||
assertEquals(expected, builder.toQuery(createShardContext()));
|
||||
}
|
||||
|
||||
private static IntervalsSource buildFuzzySource(String term, String label, int prefixLength, boolean transpositions, int editDistance) {
|
||||
FuzzyQuery fq = new FuzzyQuery(new Term("field", term), editDistance, prefixLength, 128, transpositions);
|
||||
CompiledAutomaton[] automata = fq.getAutomata();
|
||||
return XIntervals.multiterm(automata[automata.length - 1], label);
|
||||
return Intervals.multiterm(automata[automata.length - 1], label);
|
||||
}
|
||||
|
||||
public void testFuzzy() throws IOException {
|
||||
|
@ -0,0 +1 @@
|
||||
39933692162e28c2719b60f499204b28236a2858
|
@ -1 +0,0 @@
|
||||
ca406661129d35008411365d2b6e747dc39378af
|
Loading…
x
Reference in New Issue
Block a user