mirror of https://github.com/apache/lucene.git
merge trunk up to r1670923
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene6271@1670929 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
5ea36eb992
|
@ -19,6 +19,10 @@ New Features
|
|||
for counting ranges that align with the underlying terms as defined by the
|
||||
NumberRangePrefixTree (e.g. familiar date units like days). (David Smiley)
|
||||
|
||||
* LUCENE-5879: Added experimental auto-prefix terms to BlockTree terms
|
||||
dictionary, exposed as AutoPrefixPostingsFormat (Adrien Grand,
|
||||
Uwe Schindler, Robert Muir, Mike McCandless)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-3312: The API of oal.document was restructured to
|
||||
|
|
|
@ -0,0 +1,125 @@
|
|||
package org.apache.lucene.codecs.autoprefix;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.FieldsProducer;
|
||||
import org.apache.lucene.codecs.PostingsFormat;
|
||||
import org.apache.lucene.codecs.PostingsReaderBase;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader;
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsFormat;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsReader;
|
||||
import org.apache.lucene.codecs.lucene50.Lucene50PostingsWriter;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
/**
|
||||
* Just like {@link Lucene50PostingsFormat} except this format
|
||||
* exposes the experimental auto-prefix terms.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
||||
public final class AutoPrefixPostingsFormat extends PostingsFormat {
|
||||
|
||||
private final int minItemsInBlock;
|
||||
private final int maxItemsInBlock;
|
||||
private final int minItemsInAutoPrefix;
|
||||
private final int maxItemsInAutoPrefix;
|
||||
|
||||
/** Creates {@code AutoPrefixPostingsFormat} with default settings. */
|
||||
public AutoPrefixPostingsFormat() {
|
||||
this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
|
||||
BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
|
||||
25, 48);
|
||||
}
|
||||
|
||||
/** Creates {@code Lucene50PostingsFormat} with custom
|
||||
* values for {@code minBlockSize} and {@code
|
||||
* maxBlockSize} passed to block terms dictionary.
|
||||
* @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int) */
|
||||
public AutoPrefixPostingsFormat(int minItemsInAutoPrefix, int maxItemsInAutoPrefix) {
|
||||
this(BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE,
|
||||
BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE,
|
||||
minItemsInAutoPrefix,
|
||||
maxItemsInAutoPrefix);
|
||||
}
|
||||
|
||||
/** Creates {@code Lucene50PostingsFormat} with custom
|
||||
* values for {@code minBlockSize}, {@code
|
||||
* maxBlockSize}, {@code minItemsInAutoPrefix} and {@code maxItemsInAutoPrefix}, passed
|
||||
* to block tree terms dictionary.
|
||||
* @see BlockTreeTermsWriter#BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int,int,int) */
|
||||
public AutoPrefixPostingsFormat(int minItemsInBlock, int maxItemsInBlock, int minItemsInAutoPrefix, int maxItemsInAutoPrefix) {
|
||||
super("AutoPrefix");
|
||||
BlockTreeTermsWriter.validateSettings(minItemsInBlock,
|
||||
maxItemsInBlock);
|
||||
BlockTreeTermsWriter.validateAutoPrefixSettings(minItemsInAutoPrefix,
|
||||
maxItemsInAutoPrefix);
|
||||
this.minItemsInBlock = minItemsInBlock;
|
||||
this.maxItemsInBlock = maxItemsInBlock;
|
||||
this.minItemsInAutoPrefix = minItemsInAutoPrefix;
|
||||
this.maxItemsInAutoPrefix = maxItemsInAutoPrefix;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsConsumer fieldsConsumer(SegmentWriteState state) throws IOException {
|
||||
PostingsWriterBase postingsWriter = new Lucene50PostingsWriter(state);
|
||||
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsConsumer ret = new BlockTreeTermsWriter(state,
|
||||
postingsWriter,
|
||||
minItemsInBlock,
|
||||
maxItemsInBlock,
|
||||
minItemsInAutoPrefix,
|
||||
maxItemsInAutoPrefix);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsWriter);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldsProducer fieldsProducer(SegmentReadState state) throws IOException {
|
||||
PostingsReaderBase postingsReader = new Lucene50PostingsReader(state);
|
||||
boolean success = false;
|
||||
try {
|
||||
FieldsProducer ret = new BlockTreeTermsReader(postingsReader, state);
|
||||
success = true;
|
||||
return ret;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(postingsReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,22 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* An experimental postings format that automatically indexes appropriate
|
||||
* prefix terms for fast range and prefix queries.
|
||||
*/
|
||||
package org.apache.lucene.codecs.autoprefix;
|
|
@ -20,3 +20,4 @@ org.apache.lucene.codecs.memory.FSTOrdPostingsFormat
|
|||
org.apache.lucene.codecs.memory.FSTPostingsFormat
|
||||
org.apache.lucene.codecs.memory.MemoryPostingsFormat
|
||||
org.apache.lucene.codecs.simpletext.SimpleTextPostingsFormat
|
||||
org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
package org.apache.lucene.codecs.autoprefix;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.RandomPostingsTester;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
/**
|
||||
* Tests AutoPrefix's postings
|
||||
*/
|
||||
|
||||
// NOTE: we don't extend BasePostingsFormatTestCase becase we can only handle DOCS_ONLY fields:
|
||||
|
||||
public class TestAutoPrefixPostingsFormat extends LuceneTestCase {
|
||||
public void test() throws Exception {
|
||||
new RandomPostingsTester(random()).testFull(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat()),
|
||||
createTempDir("autoprefix"),
|
||||
IndexOptions.DOCS,
|
||||
false);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,738 @@
|
|||
package org.apache.lucene.codecs.autoprefix;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.PrefixQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
public class TestAutoPrefixTerms extends LuceneTestCase {
|
||||
|
||||
private int minItemsPerBlock = TestUtil.nextInt(random(), 2, 100);
|
||||
private int maxItemsPerBlock = 2*(Math.max(2, minItemsPerBlock-1)) + random().nextInt(100);
|
||||
private int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
|
||||
private int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
|
||||
|
||||
private final Codec codec = TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minItemsPerBlock, maxItemsPerBlock,
|
||||
minTermsAutoPrefix, maxTermsAutoPrefix));
|
||||
|
||||
// Numbers in a restricted range, encoded in decimal, left-0-padded:
|
||||
public void testBasicNumericRanges() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setCodec(codec);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
int numTerms = TestUtil.nextInt(random(), 3000, 50000);
|
||||
Set<String> terms = new HashSet<>();
|
||||
int digits = TestUtil.nextInt(random(), 5, 10);
|
||||
int maxValue = 1;
|
||||
for(int i=0;i<digits;i++) {
|
||||
maxValue *= 10;
|
||||
}
|
||||
String format = "%0" + digits + "d";
|
||||
while (terms.size() < numTerms) {
|
||||
terms.add(String.format(Locale.ROOT, format, random().nextInt(maxValue)));
|
||||
}
|
||||
|
||||
for(String term : terms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
doc.add(new NumericDocValuesField("field", Long.parseLong(term)));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (VERBOSE) System.out.println("\nTEST: now optimize");
|
||||
if (random().nextBoolean()) {
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
if (VERBOSE) System.out.println("\nTEST: now done");
|
||||
IndexReader r = DirectoryReader.open(w, true);
|
||||
|
||||
List<String> sortedTerms = new ArrayList<>(terms);
|
||||
Collections.sort(sortedTerms);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: sorted terms:");
|
||||
int idx = 0;
|
||||
for(String term : sortedTerms) {
|
||||
System.out.println(idx + ": " + term);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
int min, max;
|
||||
while (true) {
|
||||
min = random().nextInt(maxValue);
|
||||
max = random().nextInt(maxValue);
|
||||
if (min == max) {
|
||||
continue;
|
||||
} else if (min > max) {
|
||||
int x = min;
|
||||
min = max;
|
||||
max = x;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " min=" + min + " max=" + max);
|
||||
}
|
||||
|
||||
boolean minInclusive = random().nextBoolean();
|
||||
boolean maxInclusive = random().nextBoolean();
|
||||
BytesRef minTerm = new BytesRef(String.format(Locale.ROOT, format, min));
|
||||
BytesRef maxTerm = new BytesRef(String.format(Locale.ROOT, format, max));
|
||||
CompiledAutomaton ca = new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive),
|
||||
true, false, Integer.MAX_VALUE, true);
|
||||
|
||||
TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
|
||||
NumericDocValues docValues = MultiDocValues.getNumericValues(r, "field");
|
||||
PostingsEnum postingsEnum = null;
|
||||
|
||||
VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), minTerm, maxTerm);
|
||||
|
||||
while (te.next() != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got term=" + te.term().utf8ToString());
|
||||
}
|
||||
verifier.sawTerm(te.term());
|
||||
postingsEnum = te.postings(null, postingsEnum);
|
||||
int docID;
|
||||
while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
|
||||
long v = docValues.get(docID);
|
||||
assert v >= min && v <= max: "docID=" + docID + " v=" + v;
|
||||
// The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got docID=" + docID + " v=" + v);
|
||||
}
|
||||
verifier.sawDoc(docID);
|
||||
}
|
||||
}
|
||||
|
||||
int startLoc = Collections.binarySearch(sortedTerms, String.format(Locale.ROOT, format, min));
|
||||
if (startLoc < 0) {
|
||||
startLoc = -startLoc-1;
|
||||
} else if (minInclusive == false) {
|
||||
startLoc++;
|
||||
}
|
||||
int endLoc = Collections.binarySearch(sortedTerms, String.format(Locale.ROOT, format, max));
|
||||
if (endLoc < 0) {
|
||||
endLoc = -endLoc-2;
|
||||
} else if (maxInclusive == false) {
|
||||
endLoc--;
|
||||
}
|
||||
verifier.finish(endLoc-startLoc+1, maxTermsAutoPrefix);
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static BytesRef intToBytes(int v) {
|
||||
int sortableBits = v ^ 0x80000000;
|
||||
BytesRef token = new BytesRef(4);
|
||||
token.length = 4;
|
||||
int index = 3;
|
||||
while (index >= 0) {
|
||||
token.bytes[index] = (byte) (sortableBits & 0xff);
|
||||
index--;
|
||||
sortableBits >>>= 8;
|
||||
}
|
||||
return token;
|
||||
}
|
||||
|
||||
// Numbers are encoded in full binary (4 byte ints):
|
||||
public void testBinaryNumericRanges() throws Exception {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: minItemsPerBlock=" + minItemsPerBlock);
|
||||
System.out.println("TEST: maxItemsPerBlock=" + maxItemsPerBlock);
|
||||
System.out.println("TEST: minTermsAutoPrefix=" + minTermsAutoPrefix);
|
||||
System.out.println("TEST: maxTermsAutoPrefix=" + maxTermsAutoPrefix);
|
||||
}
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setCodec(codec);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
int numTerms = TestUtil.nextInt(random(), 3000, 50000);
|
||||
Set<Integer> terms = new HashSet<>();
|
||||
while (terms.size() < numTerms) {
|
||||
terms.add(random().nextInt());
|
||||
}
|
||||
|
||||
for(Integer term : terms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new BinaryField("field", intToBytes(term)));
|
||||
doc.add(new NumericDocValuesField("field", term));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
if (VERBOSE) System.out.println("TEST: now force merge");
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
IndexReader r = DirectoryReader.open(w, true);
|
||||
|
||||
List<Integer> sortedTerms = new ArrayList<>(terms);
|
||||
Collections.sort(sortedTerms);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: sorted terms:");
|
||||
int idx = 0;
|
||||
for(Integer term : sortedTerms) {
|
||||
System.out.println(idx + ": " + term);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
|
||||
int min, max;
|
||||
while (true) {
|
||||
min = random().nextInt();
|
||||
max = random().nextInt();
|
||||
if (min == max) {
|
||||
continue;
|
||||
} else if (min > max) {
|
||||
int x = min;
|
||||
min = max;
|
||||
max = x;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter + " min=" + min + " (" + intToBytes(min) + ") max=" + max + " (" + intToBytes(max) + ")");
|
||||
}
|
||||
|
||||
boolean minInclusive = random().nextBoolean();
|
||||
BytesRef minTerm = intToBytes(min);
|
||||
boolean maxInclusive = random().nextBoolean();
|
||||
BytesRef maxTerm = intToBytes(max);
|
||||
CompiledAutomaton ca = new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive),
|
||||
true, false, Integer.MAX_VALUE, true);
|
||||
|
||||
TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
|
||||
NumericDocValues docValues = MultiDocValues.getNumericValues(r, "field");
|
||||
PostingsEnum postingsEnum = null;
|
||||
VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), minTerm, maxTerm);
|
||||
while (te.next() != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" got term=" + te.term() + " docFreq=" + te.docFreq());
|
||||
}
|
||||
verifier.sawTerm(te.term());
|
||||
postingsEnum = te.postings(null, postingsEnum);
|
||||
int docID;
|
||||
while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
|
||||
long v = docValues.get(docID);
|
||||
assert v >= min && v <= max: "docID=" + docID + " v=" + v;
|
||||
verifier.sawDoc(docID);
|
||||
}
|
||||
}
|
||||
|
||||
int startLoc = Collections.binarySearch(sortedTerms, min);
|
||||
if (startLoc < 0) {
|
||||
startLoc = -startLoc-1;
|
||||
} else if (minInclusive == false) {
|
||||
startLoc++;
|
||||
}
|
||||
int endLoc = Collections.binarySearch(sortedTerms, max);
|
||||
if (endLoc < 0) {
|
||||
endLoc = -endLoc-2;
|
||||
} else if (maxInclusive == false) {
|
||||
endLoc--;
|
||||
}
|
||||
int expectedHits = endLoc-startLoc+1;
|
||||
try {
|
||||
verifier.finish(expectedHits, maxTermsAutoPrefix);
|
||||
} catch (AssertionError ae) {
|
||||
for(int i=0;i<numTerms;i++) {
|
||||
if (verifier.allHits.get(i) == false) {
|
||||
int v = (int) docValues.get(i);
|
||||
boolean accept = (v > min || (v == min && minInclusive)) &&
|
||||
(v < max || (v == max && maxInclusive));
|
||||
if (accept) {
|
||||
System.out.println("MISSING: docID=" + i + " v=" + v + " term=" + intToBytes(v));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw ae;
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// Non-numeric, simple prefix query
|
||||
public void testBasicPrefixTerms() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setCodec(codec);
|
||||
iwc.setMergeScheduler(new SerialMergeScheduler());
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
int numTerms = TestUtil.nextInt(random(), 3000, 50000);
|
||||
Set<String> terms = new HashSet<>();
|
||||
while (terms.size() < numTerms) {
|
||||
terms.add(TestUtil.randomSimpleString(random()));
|
||||
}
|
||||
|
||||
for(String term : terms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
doc.add(new BinaryDocValuesField("field", new BytesRef(term)));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: now force merge");
|
||||
}
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
IndexReader r = DirectoryReader.open(w, true);
|
||||
|
||||
List<String> sortedTerms = new ArrayList<>(terms);
|
||||
Collections.sort(sortedTerms);
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: sorted terms:");
|
||||
int idx = 0;
|
||||
for(String term : sortedTerms) {
|
||||
System.out.println(idx + ": " + term);
|
||||
idx++;
|
||||
}
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: r=" + r);
|
||||
}
|
||||
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: iter=" + iter);
|
||||
}
|
||||
|
||||
String prefix;
|
||||
if (random().nextInt(100) == 42) {
|
||||
prefix = "";
|
||||
} else {
|
||||
prefix = TestUtil.randomSimpleString(random(), 1, 4);
|
||||
}
|
||||
BytesRef prefixBR = new BytesRef(prefix);
|
||||
if (VERBOSE) {
|
||||
System.out.println(" prefix=" + prefix);
|
||||
}
|
||||
|
||||
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(prefixBR), true, false, Integer.MAX_VALUE, true);
|
||||
TermsEnum te = ca.getTermsEnum(MultiFields.getTerms(r, "field"));
|
||||
BinaryDocValues docValues = MultiDocValues.getBinaryValues(r, "field");
|
||||
PostingsEnum postingsEnum = null;
|
||||
|
||||
VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), prefixBR);
|
||||
|
||||
while (te.next() != null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: got term=" + te.term().utf8ToString() + " docFreq=" + te.docFreq());
|
||||
}
|
||||
verifier.sawTerm(te.term());
|
||||
postingsEnum = te.postings(null, postingsEnum);
|
||||
int docID;
|
||||
while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
|
||||
assertTrue("prefixBR=" + prefixBR + " docBR=" + docValues.get(docID), StringHelper.startsWith(docValues.get(docID), prefixBR));
|
||||
// The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
|
||||
verifier.sawDoc(docID);
|
||||
}
|
||||
}
|
||||
|
||||
int startLoc = Collections.binarySearch(sortedTerms, prefix);
|
||||
if (startLoc < 0) {
|
||||
startLoc = -startLoc-1;
|
||||
}
|
||||
int endLoc = Collections.binarySearch(sortedTerms, prefix + (char) ('z'+1));
|
||||
if (endLoc < 0) {
|
||||
endLoc = -endLoc-2;
|
||||
}
|
||||
int expectedHits = endLoc-startLoc+1;
|
||||
try {
|
||||
verifier.finish(expectedHits, maxTermsAutoPrefix);
|
||||
} catch (AssertionError ae) {
|
||||
for(int i=0;i<numTerms;i++) {
|
||||
if (verifier.allHits.get(i) == false) {
|
||||
String s = docValues.get(i).utf8ToString();
|
||||
if (s.startsWith(prefix)) {
|
||||
System.out.println("MISSING: docID=" + i + " term=" + s);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
throw ae;
|
||||
}
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testDemoPrefixTerms() throws Exception {
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: minTermsAutoPrefix=" + minTermsAutoPrefix + " maxTermsAutoPrefix=" + maxTermsAutoPrefix);
|
||||
System.out.println("\nTEST: minItemsPerBlock=" + minItemsPerBlock + " maxItemsPerBlock=" + maxItemsPerBlock);
|
||||
}
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setCodec(codec);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
int numDocs = 30;
|
||||
|
||||
for(int i=0;i<numDocs;i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", "" + (char) (97+i), Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
doc = new Document();
|
||||
doc.add(new StringField("field", "a" + (char) (97+i), Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
w.forceMerge(1);
|
||||
}
|
||||
|
||||
IndexReader r = DirectoryReader.open(w, true);
|
||||
Terms terms = MultiFields.getTerms(r, "field");
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nTEST: now intersect");
|
||||
}
|
||||
CompiledAutomaton ca = new CompiledAutomaton(PrefixQuery.toAutomaton(new BytesRef("a")), false, false, Integer.MAX_VALUE, true);
|
||||
TermsEnum te = ca.getTermsEnum(terms);
|
||||
PostingsEnum postingsEnum = null;
|
||||
|
||||
VerifyAutoPrefixTerms verifier = new VerifyAutoPrefixTerms(r.maxDoc(), new BytesRef("a"));
|
||||
//TermsEnum te = terms.intersect(new CompiledAutomaton(a, true, false), null);
|
||||
while (te.next() != null) {
|
||||
verifier.sawTerm(te.term());
|
||||
postingsEnum = te.postings(null, postingsEnum);
|
||||
int docID;
|
||||
while ((docID = postingsEnum.nextDoc()) != PostingsEnum.NO_MORE_DOCS) {
|
||||
// The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
|
||||
verifier.sawDoc(docID);
|
||||
}
|
||||
}
|
||||
// 1 document has exactly "a", and 30 documents had "a?"
|
||||
verifier.finish(31, maxTermsAutoPrefix);
|
||||
PrefixQuery q = new PrefixQuery(new Term("field", "a"));
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
assertEquals(31, newSearcher(r).search(q, 1).totalHits);
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
static final class BinaryTokenStream extends TokenStream {
|
||||
private final ByteTermAttribute bytesAtt = addAttribute(ByteTermAttribute.class);
|
||||
private boolean available = true;
|
||||
|
||||
public BinaryTokenStream(BytesRef bytes) {
|
||||
bytesAtt.setBytesRef(bytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() {
|
||||
if (available) {
|
||||
clearAttributes();
|
||||
available = false;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() {
|
||||
available = true;
|
||||
}
|
||||
|
||||
public interface ByteTermAttribute extends TermToBytesRefAttribute {
|
||||
void setBytesRef(BytesRef bytes);
|
||||
}
|
||||
|
||||
public static class ByteTermAttributeImpl extends AttributeImpl implements ByteTermAttribute,TermToBytesRefAttribute {
|
||||
private BytesRef bytes;
|
||||
|
||||
@Override
|
||||
public void fillBytesRef() {
|
||||
// no-op: the bytes was already filled by our owner's incrementToken
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getBytesRef() {
|
||||
return bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBytesRef(BytesRef bytes) {
|
||||
this.bytes = bytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
||||
@Override
|
||||
public void copyTo(AttributeImpl target) {
|
||||
ByteTermAttributeImpl other = (ByteTermAttributeImpl) target;
|
||||
other.bytes = bytes;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Basically a StringField that accepts binary term. */
|
||||
private static class BinaryField extends Field {
|
||||
|
||||
final static FieldType TYPE;
|
||||
static {
|
||||
TYPE = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
// Necessary so our custom tokenStream is used by Field.tokenStream:
|
||||
TYPE.setTokenized(true);
|
||||
TYPE.freeze();
|
||||
}
|
||||
|
||||
public BinaryField(String name, BytesRef value) {
|
||||
super(name, new BinaryTokenStream(value), TYPE);
|
||||
}
|
||||
}
|
||||
|
||||
/** Helper class to ensure auto-prefix terms 1) never overlap one another, and 2) are used when they should be. */
|
||||
private static class VerifyAutoPrefixTerms {
|
||||
final FixedBitSet allHits;
|
||||
private final Map<BytesRef,Integer> prefixCounts = new HashMap<>();
|
||||
private int totPrefixCount;
|
||||
private final BytesRef[] bounds;
|
||||
private int totTermCount;
|
||||
private BytesRef lastTerm;
|
||||
|
||||
public VerifyAutoPrefixTerms(int maxDoc, BytesRef... bounds) {
|
||||
allHits = new FixedBitSet(maxDoc);
|
||||
assert bounds.length > 0;
|
||||
this.bounds = bounds;
|
||||
}
|
||||
|
||||
public void sawTerm(BytesRef term) {
|
||||
//System.out.println("saw term=" + term);
|
||||
if (lastTerm != null) {
|
||||
assertTrue(lastTerm.compareTo(term) < 0);
|
||||
}
|
||||
lastTerm = BytesRef.deepCopyOf(term);
|
||||
totTermCount++;
|
||||
totPrefixCount += term.length;
|
||||
for(int i=1;i<=term.length;i++) {
|
||||
BytesRef prefix = BytesRef.deepCopyOf(term);
|
||||
prefix.length = i;
|
||||
Integer count = prefixCounts.get(prefix);
|
||||
if (count == null) {
|
||||
count = 1;
|
||||
} else {
|
||||
count += 1;
|
||||
}
|
||||
prefixCounts.put(prefix, count);
|
||||
}
|
||||
}
|
||||
|
||||
public void sawDoc(int docID) {
|
||||
// The auto-prefix terms should never "overlap" one another, so we should only ever see a given docID one time:
|
||||
assertFalse(allHits.getAndSet(docID));
|
||||
}
|
||||
|
||||
public void finish(int expectedNumHits, int maxPrefixCount) {
|
||||
|
||||
if (maxPrefixCount != -1) {
|
||||
// Auto-terms were used in this test
|
||||
long allowedMaxTerms;
|
||||
|
||||
if (bounds.length == 1) {
|
||||
// Simple prefix query: we should never see more than maxPrefixCount terms:
|
||||
allowedMaxTerms = maxPrefixCount;
|
||||
} else {
|
||||
// Trickier: we need to allow for maxPrefixTerms for each different leading byte in the min and max:
|
||||
assert bounds.length == 2;
|
||||
BytesRef minTerm = bounds[0];
|
||||
BytesRef maxTerm = bounds[1];
|
||||
|
||||
int commonPrefix = 0;
|
||||
for(int i=0;i<minTerm.length && i<maxTerm.length;i++) {
|
||||
if (minTerm.bytes[minTerm.offset+i] != maxTerm.bytes[maxTerm.offset+i]) {
|
||||
commonPrefix = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
allowedMaxTerms = maxPrefixCount * (long) ((minTerm.length-commonPrefix) + (maxTerm.length-commonPrefix));
|
||||
}
|
||||
|
||||
assertTrue("totTermCount=" + totTermCount + " is > allowedMaxTerms=" + allowedMaxTerms, totTermCount <= allowedMaxTerms);
|
||||
}
|
||||
|
||||
assertEquals(expectedNumHits, allHits.cardinality());
|
||||
int sum = 0;
|
||||
for(Map.Entry<BytesRef,Integer> ent : prefixCounts.entrySet()) {
|
||||
|
||||
BytesRef prefix = ent.getKey();
|
||||
if (VERBOSE) {
|
||||
System.out.println(" verify prefix=" + TestUtil.bytesRefToString(prefix) + " count=" + ent.getValue());
|
||||
}
|
||||
|
||||
if (maxPrefixCount != -1) {
|
||||
// Auto-terms were used in this test
|
||||
|
||||
int sumLeftoverSuffix = 0;
|
||||
for(BytesRef bound : bounds) {
|
||||
|
||||
int minSharedLength = Math.min(bound.length, prefix.length);
|
||||
int commonPrefix = minSharedLength;
|
||||
for(int i=0;i<minSharedLength;i++) {
|
||||
if (bound.bytes[bound.offset+i] != prefix.bytes[prefix.offset+i]) {
|
||||
commonPrefix = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
sumLeftoverSuffix += bound.length - commonPrefix;
|
||||
}
|
||||
|
||||
long limit = (1+sumLeftoverSuffix) * (long) maxPrefixCount;
|
||||
|
||||
assertTrue("maxPrefixCount=" + maxPrefixCount + " prefix=" + prefix + " sumLeftoverSuffix=" + sumLeftoverSuffix + " limit=" + limit + " vs actual=" +ent.getValue(),
|
||||
ent.getValue() <= limit);
|
||||
}
|
||||
|
||||
sum += ent.getValue();
|
||||
}
|
||||
|
||||
// Make sure no test bug:
|
||||
assertEquals(totPrefixCount, sum);
|
||||
}
|
||||
}
|
||||
|
||||
/** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
|
||||
public void testWithFreqs() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setCodec(codec);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("foo", "bar bar", ft));
|
||||
w.addDocument(doc);
|
||||
try {
|
||||
w.commit();
|
||||
} catch (IllegalStateException ise) {
|
||||
assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", ise.getMessage());
|
||||
}
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
|
||||
public void testWithPositions() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setCodec(codec);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("foo", "bar bar", ft));
|
||||
w.addDocument(doc);
|
||||
try {
|
||||
w.commit();
|
||||
} catch (IllegalStateException ise) {
|
||||
assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", ise.getMessage());
|
||||
}
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
/** Make sure you get clear exc. if you try to use this within anything but IndexOptions.DOCS fields. */
|
||||
public void testWithOffsets() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(new MockAnalyzer(random()));
|
||||
iwc.setCodec(codec);
|
||||
IndexWriter w = new IndexWriter(dir, iwc);
|
||||
FieldType ft = new FieldType(StringField.TYPE_NOT_STORED);
|
||||
ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("foo", "bar bar", ft));
|
||||
w.addDocument(doc);
|
||||
try {
|
||||
w.commit();
|
||||
} catch (IllegalStateException ise) {
|
||||
assertEquals("ranges can only be indexed with IndexOptions.DOCS (field: foo)", ise.getMessage());
|
||||
}
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -16,6 +16,7 @@ package org.apache.lucene.codecs;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsReader; // javadocs
|
||||
import org.apache.lucene.index.OrdTermState;
|
||||
import org.apache.lucene.index.TermState;
|
||||
|
||||
|
@ -23,6 +24,8 @@ import org.apache.lucene.index.TermState;
|
|||
* Holds all state required for {@link PostingsReaderBase}
|
||||
* to produce a {@link org.apache.lucene.index.PostingsEnum} without re-seeking the
|
||||
* terms dict.
|
||||
*
|
||||
* @lucene.internal
|
||||
*/
|
||||
public class BlockTermState extends OrdTermState {
|
||||
/** how many docs have this term */
|
||||
|
@ -36,6 +39,11 @@ public class BlockTermState extends OrdTermState {
|
|||
// TODO: update BTR to nuke this
|
||||
public long blockFilePointer;
|
||||
|
||||
/** True if this term is "real" (e.g., not an auto-prefix term or
|
||||
* some other "secret" term; currently only {@link BlockTreeTermsReader}
|
||||
* sets this). */
|
||||
public boolean isRealTerm;
|
||||
|
||||
/** Sole constructor. (For invocation by subclass
|
||||
* constructors, typically implicit.) */
|
||||
protected BlockTermState() {
|
||||
|
@ -50,10 +58,11 @@ public class BlockTermState extends OrdTermState {
|
|||
totalTermFreq = other.totalTermFreq;
|
||||
termBlockOrd = other.termBlockOrd;
|
||||
blockFilePointer = other.blockFilePointer;
|
||||
isRealTerm = other.isRealTerm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer;
|
||||
return "docFreq=" + docFreq + " totalTermFreq=" + totalTermFreq + " termBlockOrd=" + termBlockOrd + " blockFP=" + blockFilePointer + " isRealTerm=" + isRealTerm;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -62,6 +62,7 @@ public abstract class PostingsFormat implements NamedSPILoader.NamedSPI {
|
|||
* @param name must be all ascii alphanumeric, and less than 128 characters in length.
|
||||
*/
|
||||
protected PostingsFormat(String name) {
|
||||
// TODO: can we somehow detect name conflicts here? Two different classes trying to claim the same name? Otherwise you see confusing errors...
|
||||
NamedSPILoader.checkServiceName(name);
|
||||
this.name = name;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,415 @@
|
|||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
||||
// TODO: instead of inlining auto-prefix terms with normal terms,
|
||||
// we could write them into their own virtual/private field. This
|
||||
// would make search time a bit more complex, since we'd need to
|
||||
// merge sort between two TermEnums, but it would also make stats
|
||||
// API (used by CheckIndex -verbose) easier to implement since we could
|
||||
// just walk this virtual field and gather its stats)
|
||||
|
||||
/** Used in the first pass when writing a segment to locate
|
||||
* "appropriate" auto-prefix terms to pre-compile into the index.
|
||||
* This visits every term in the index to find prefixes that
|
||||
* match >= min and <= max number of terms. */
|
||||
|
||||
class AutoPrefixTermsWriter {
|
||||
|
||||
//static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||
//static boolean DEBUG = false;
|
||||
//static boolean DEBUG2 = BlockTreeTermsWriter.DEBUG2;
|
||||
//static boolean DEBUG2 = true;
|
||||
|
||||
/** Describes a range of term-space to match, either a simple prefix
|
||||
* (foo*) or a floor-block range of a prefix (e.g. foo[a-m]*,
|
||||
* foo[n-z]*) when there are too many terms starting with foo*. */
|
||||
public static final class PrefixTerm implements Comparable<PrefixTerm> {
|
||||
/** Common prefix */
|
||||
public final byte[] prefix;
|
||||
|
||||
/** If this is -2, this is a normal prefix (foo *), else it's the minimum lead byte of the suffix (e.g. 'd' in foo[d-m]*). */
|
||||
public final int floorLeadStart;
|
||||
|
||||
/** The lead byte (inclusive) of the suffix for the term range we match (e.g. 'm' in foo[d-m*]); this is ignored when
|
||||
* floorLeadStart is -2. */
|
||||
public final int floorLeadEnd;
|
||||
|
||||
public final BytesRef term;
|
||||
|
||||
/** Sole constructor. */
|
||||
public PrefixTerm(byte[] prefix, int floorLeadStart, int floorLeadEnd) {
|
||||
this.prefix = prefix;
|
||||
this.floorLeadStart = floorLeadStart;
|
||||
this.floorLeadEnd = floorLeadEnd;
|
||||
this.term = toBytesRef(prefix, floorLeadStart);
|
||||
|
||||
assert floorLeadEnd >= floorLeadStart;
|
||||
assert floorLeadEnd >= 0;
|
||||
assert floorLeadStart == -2 || floorLeadStart >= 0;
|
||||
|
||||
// We should never create empty-string prefix term:
|
||||
assert prefix.length > 0 || floorLeadStart != -2 || floorLeadEnd != 0xff;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String s = brToString(new BytesRef(prefix));
|
||||
if (floorLeadStart == -2) {
|
||||
s += "[-" + Integer.toHexString(floorLeadEnd) + "]";
|
||||
} else {
|
||||
s += "[" + Integer.toHexString(floorLeadStart) + "-" + Integer.toHexString(floorLeadEnd) + "]";
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(PrefixTerm other) {
|
||||
int cmp = term.compareTo(other.term);
|
||||
if (cmp == 0) {
|
||||
if (prefix.length != other.prefix.length) {
|
||||
return prefix.length - other.prefix.length;
|
||||
}
|
||||
|
||||
// On tie, sort the bigger floorLeadEnd, earlier, since it
|
||||
// spans more terms, so during intersect, we want to encounter this one
|
||||
// first so we can use it if the automaton accepts the larger range:
|
||||
cmp = other.floorLeadEnd - floorLeadEnd;
|
||||
}
|
||||
|
||||
return cmp;
|
||||
}
|
||||
|
||||
/** Returns the leading term for this prefix term, e.g. "foo" (for
|
||||
* the foo* prefix) or "foom" (for the foo[m-z]* case). */
|
||||
private static BytesRef toBytesRef(byte[] prefix, int floorLeadStart) {
|
||||
BytesRef br;
|
||||
if (floorLeadStart != -2) {
|
||||
assert floorLeadStart >= 0;
|
||||
br = new BytesRef(prefix.length+1);
|
||||
} else {
|
||||
br = new BytesRef(prefix.length);
|
||||
}
|
||||
System.arraycopy(prefix, 0, br.bytes, 0, prefix.length);
|
||||
br.length = prefix.length;
|
||||
if (floorLeadStart != -2) {
|
||||
assert floorLeadStart >= 0;
|
||||
br.bytes[br.length++] = (byte) floorLeadStart;
|
||||
}
|
||||
|
||||
return br;
|
||||
}
|
||||
|
||||
public int compareTo(BytesRef term) {
|
||||
return this.term.compareTo(term);
|
||||
}
|
||||
|
||||
public TermsEnum getTermsEnum(TermsEnum in) {
|
||||
|
||||
final BytesRef prefixRef = new BytesRef(prefix);
|
||||
|
||||
return new FilteredTermsEnum(in) {
|
||||
{
|
||||
setInitialSeekTerm(term);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
if (StringHelper.startsWith(term, prefixRef) &&
|
||||
(floorLeadEnd == -1 || term.length == prefixRef.length || (term.bytes[term.offset + prefixRef.length] & 0xff) <= floorLeadEnd)) {
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.END;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
final List<PrefixTerm> prefixes = new ArrayList<>();
|
||||
private final int minItemsInPrefix;
|
||||
private final int maxItemsInPrefix;
|
||||
|
||||
// Records index into pending where the current prefix at that
|
||||
// length "started"; for example, if current term starts with 't',
|
||||
// startsByPrefix[0] is the index into pending for the first
|
||||
// term/sub-block starting with 't'. We use this to figure out when
|
||||
// to write a new block:
|
||||
private final BytesRefBuilder lastTerm = new BytesRefBuilder();
|
||||
private int[] prefixStarts = new int[8];
|
||||
private List<Object> pending = new ArrayList<>();
|
||||
|
||||
//private final String segment;
|
||||
|
||||
public AutoPrefixTermsWriter(Terms terms, int minItemsInPrefix, int maxItemsInPrefix) throws IOException {
|
||||
this.minItemsInPrefix = minItemsInPrefix;
|
||||
this.maxItemsInPrefix = maxItemsInPrefix;
|
||||
//this.segment = segment;
|
||||
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
while (true) {
|
||||
BytesRef term = termsEnum.next();
|
||||
if (term == null) {
|
||||
break;
|
||||
}
|
||||
//if (DEBUG) System.out.println("pushTerm: " + brToString(term));
|
||||
pushTerm(term);
|
||||
}
|
||||
|
||||
if (pending.size() > 1) {
|
||||
pushTerm(BlockTreeTermsWriter.EMPTY_BYTES_REF);
|
||||
|
||||
// Also maybe save floor prefixes in root block; this can be a biggish perf gain for large ranges:
|
||||
/*
|
||||
System.out.println("root block pending.size=" + pending.size());
|
||||
for(Object o : pending) {
|
||||
System.out.println(" " + o);
|
||||
}
|
||||
*/
|
||||
while (pending.size() >= minItemsInPrefix) {
|
||||
savePrefixes(0, pending.size());
|
||||
}
|
||||
}
|
||||
|
||||
Collections.sort(prefixes);
|
||||
}
|
||||
|
||||
/** Pushes the new term to the top of the stack, and writes new blocks. */
|
||||
private void pushTerm(BytesRef text) throws IOException {
|
||||
int limit = Math.min(lastTerm.length(), text.length);
|
||||
|
||||
// Find common prefix between last term and current term:
|
||||
int pos = 0;
|
||||
while (pos < limit && lastTerm.byteAt(pos) == text.bytes[text.offset+pos]) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
//if (DEBUG) System.out.println(" shared=" + pos + " lastTerm.length=" + lastTerm.length());
|
||||
|
||||
// Close the "abandoned" suffix now:
|
||||
for(int i=lastTerm.length()-1;i>=pos;i--) {
|
||||
|
||||
// How many items on top of the stack share the current suffix
|
||||
// we are closing:
|
||||
int prefixTopSize = pending.size() - prefixStarts[i];
|
||||
|
||||
while (prefixTopSize >= minItemsInPrefix) {
|
||||
//if (DEBUG) System.out.println("pushTerm i=" + i + " prefixTopSize=" + prefixTopSize + " minItemsInBlock=" + minItemsInPrefix);
|
||||
savePrefixes(i+1, prefixTopSize);
|
||||
//prefixStarts[i] -= prefixTopSize;
|
||||
//System.out.println(" after savePrefixes: " + (pending.size() - prefixStarts[i]) + " pending.size()=" + pending.size() + " start=" + prefixStarts[i]);
|
||||
|
||||
// For large floor blocks, it's possible we should now re-run on the new prefix terms we just created:
|
||||
prefixTopSize = pending.size() - prefixStarts[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (prefixStarts.length < text.length) {
|
||||
prefixStarts = ArrayUtil.grow(prefixStarts, text.length);
|
||||
}
|
||||
|
||||
// Init new tail:
|
||||
for(int i=pos;i<text.length;i++) {
|
||||
prefixStarts[i] = pending.size();
|
||||
}
|
||||
|
||||
lastTerm.copyBytes(text);
|
||||
|
||||
// Only append the first (optional) empty string, no the fake last one used to close all prefixes:
|
||||
if (text.length > 0 || pending.isEmpty()) {
|
||||
byte[] termBytes = new byte[text.length];
|
||||
System.arraycopy(text.bytes, text.offset, termBytes, 0, text.length);
|
||||
pending.add(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
void savePrefixes(int prefixLength, int count) throws IOException {
|
||||
|
||||
assert count > 0;
|
||||
|
||||
//if (DEBUG2) {
|
||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||
// br.length = prefixLength;
|
||||
// System.out.println(" savePrefixes: seg=" + segment + " " + brToString(br) + " count=" + count + " pending.size()=" + pending.size());
|
||||
//}
|
||||
|
||||
int lastSuffixLeadLabel = -2;
|
||||
|
||||
int start = pending.size()-count;
|
||||
assert start >=0;
|
||||
|
||||
int end = pending.size();
|
||||
int nextBlockStart = start;
|
||||
int nextFloorLeadLabel = -1;
|
||||
int prefixCount = 0;
|
||||
int pendingCount = 0;
|
||||
PrefixTerm lastPTEntry = null;
|
||||
for (int i=start; i<end; i++) {
|
||||
|
||||
byte[] termBytes;
|
||||
Object o = pending.get(i);
|
||||
PrefixTerm ptEntry;
|
||||
if (o instanceof byte[]) {
|
||||
ptEntry = null;
|
||||
termBytes = (byte[]) o;
|
||||
} else {
|
||||
ptEntry = (PrefixTerm) o;
|
||||
termBytes = ptEntry.term.bytes;
|
||||
if (ptEntry.prefix.length != prefixLength) {
|
||||
assert ptEntry.prefix.length > prefixLength;
|
||||
ptEntry = null;
|
||||
}
|
||||
}
|
||||
pendingCount++;
|
||||
|
||||
//if (DEBUG) System.out.println(" check term=" + brToString(new BytesRef(termBytes)));
|
||||
|
||||
int suffixLeadLabel;
|
||||
|
||||
if (termBytes.length == prefixLength) {
|
||||
// Suffix is 0, i.e. prefix 'foo' and term is
|
||||
// 'foo' so the term has empty string suffix
|
||||
// in this block
|
||||
assert lastSuffixLeadLabel == -2;
|
||||
suffixLeadLabel = -2;
|
||||
} else {
|
||||
suffixLeadLabel = termBytes[prefixLength] & 0xff;
|
||||
}
|
||||
|
||||
// if (DEBUG) System.out.println(" i=" + i + " ent=" + ent + " suffixLeadLabel=" + suffixLeadLabel);
|
||||
|
||||
if (suffixLeadLabel != lastSuffixLeadLabel) {
|
||||
// This is a boundary, a chance to make an auto-prefix term if we want:
|
||||
|
||||
// When we are "recursing" (generating auto-prefix terms on a block of
|
||||
// floor'd auto-prefix terms), this assert is non-trivial because it
|
||||
// ensures the floorLeadEnd of the previous terms is in fact less
|
||||
// than the lead start of the current entry:
|
||||
assert suffixLeadLabel > lastSuffixLeadLabel: "suffixLeadLabel=" + suffixLeadLabel + " vs lastSuffixLeadLabel=" + lastSuffixLeadLabel;
|
||||
|
||||
// NOTE: must check nextFloorLeadLabel in case minItemsInPrefix is 2 and prefix is 'a' and we've seen 'a' and then 'aa'
|
||||
if (pendingCount >= minItemsInPrefix && end-nextBlockStart > maxItemsInPrefix && nextFloorLeadLabel != -1) {
|
||||
// The count is too large for one block, so we must break it into "floor" blocks, where we record
|
||||
// the leading label of the suffix of the first term in each floor block, so at search time we can
|
||||
// jump to the right floor block. We just use a naive greedy segmenter here: make a new floor
|
||||
// block as soon as we have at least minItemsInBlock. This is not always best: it often produces
|
||||
// a too-small block as the final block:
|
||||
|
||||
// If the last entry was another prefix term of the same length, then it represents a range of terms, so we must use its ending
|
||||
// prefix label as our ending label:
|
||||
if (lastPTEntry != null) {
|
||||
lastSuffixLeadLabel = lastPTEntry.floorLeadEnd;
|
||||
}
|
||||
|
||||
savePrefix(prefixLength, nextFloorLeadLabel, lastSuffixLeadLabel);
|
||||
pendingCount = 0;
|
||||
|
||||
prefixCount++;
|
||||
nextFloorLeadLabel = suffixLeadLabel;
|
||||
nextBlockStart = i;
|
||||
}
|
||||
|
||||
if (nextFloorLeadLabel == -1) {
|
||||
nextFloorLeadLabel = suffixLeadLabel;
|
||||
//if (DEBUG) System.out.println("set first lead label=" + nextFloorLeadLabel);
|
||||
}
|
||||
|
||||
lastSuffixLeadLabel = suffixLeadLabel;
|
||||
}
|
||||
lastPTEntry = ptEntry;
|
||||
}
|
||||
|
||||
// Write last block, if any:
|
||||
if (nextBlockStart < end) {
|
||||
//System.out.println(" lastPTEntry=" + lastPTEntry + " lastSuffixLeadLabel=" + lastSuffixLeadLabel);
|
||||
if (lastPTEntry != null) {
|
||||
lastSuffixLeadLabel = lastPTEntry.floorLeadEnd;
|
||||
}
|
||||
assert lastSuffixLeadLabel >= nextFloorLeadLabel: "lastSuffixLeadLabel=" + lastSuffixLeadLabel + " nextFloorLeadLabel=" + nextFloorLeadLabel;
|
||||
if (prefixCount == 0) {
|
||||
if (prefixLength > 0) {
|
||||
savePrefix(prefixLength, -2, 0xff);
|
||||
prefixCount++;
|
||||
} else {
|
||||
// Don't add a prefix term for all terms in the index!
|
||||
}
|
||||
} else {
|
||||
if (lastSuffixLeadLabel == -2) {
|
||||
// Special case when closing the empty string root block:
|
||||
lastSuffixLeadLabel = 0xff;
|
||||
}
|
||||
savePrefix(prefixLength, nextFloorLeadLabel, lastSuffixLeadLabel);
|
||||
prefixCount++;
|
||||
}
|
||||
}
|
||||
|
||||
// Remove slice from the top of the pending stack, that we just wrote:
|
||||
int sizeToClear = count;
|
||||
if (prefixCount > 1) {
|
||||
Object o = pending.get(pending.size()-count);
|
||||
if (o instanceof byte[] && ((byte[]) o).length == prefixLength) {
|
||||
// If we were just asked to write all f* terms, but there were too many and so we made floor blocks, the exact term 'f' will remain
|
||||
// as its own item, followed by floor block terms like f[a-m]*, f[n-z]*, so in this case we leave 3 (not 2) items on the pending stack:
|
||||
sizeToClear--;
|
||||
}
|
||||
}
|
||||
pending.subList(pending.size()-sizeToClear, pending.size()).clear();
|
||||
|
||||
// Append prefix terms for each prefix, since these count like real terms that also need to be "rolled up":
|
||||
for(int i=0;i<prefixCount;i++) {
|
||||
PrefixTerm pt = prefixes.get(prefixes.size()-(prefixCount-i));
|
||||
pending.add(pt);
|
||||
}
|
||||
}
|
||||
|
||||
private void savePrefix(int prefixLength, int floorLeadStart, int floorLeadEnd) {
|
||||
byte[] prefix = new byte[prefixLength];
|
||||
System.arraycopy(lastTerm.bytes(), 0, prefix, 0, prefixLength);
|
||||
assert floorLeadStart != -1;
|
||||
assert floorLeadEnd != -1;
|
||||
|
||||
PrefixTerm pt = new PrefixTerm(prefix, floorLeadStart, floorLeadEnd);
|
||||
//if (DEBUG2) System.out.println(" savePrefix: seg=" + segment + " " + pt + " count=" + count);
|
||||
prefixes.add(pt);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,95 @@
|
|||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.BitSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.FixedBitSet; // javadocs
|
||||
|
||||
/** Takes a {@link FixedBitSet} and creates a DOCS {@link PostingsEnum} from it. */
|
||||
|
||||
class BitSetPostingsEnum extends PostingsEnum {
|
||||
private final BitSet bits;
|
||||
private DocIdSetIterator in;
|
||||
|
||||
BitSetPostingsEnum(BitSet bits) {
|
||||
this.bits = bits;
|
||||
reset();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int freq() throws IOException {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
if (in == null) {
|
||||
return -1;
|
||||
} else {
|
||||
return in.docID();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (in == null) {
|
||||
in = new BitSetIterator(bits, 0);
|
||||
}
|
||||
return in.nextDoc();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return in.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return in.cost();
|
||||
}
|
||||
|
||||
void reset() {
|
||||
in = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef getPayload() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextPosition() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int startOffset() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int endOffset() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
|
@ -0,0 +1,87 @@
|
|||
package org.apache.lucene.codecs.blocktree;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BitSet;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/** Silly stub class, used only when writing an auto-prefix
|
||||
* term in order to expose DocsEnum over a FixedBitSet. We
|
||||
* pass this to {@link PostingsWriterBase#writeTerm} so
|
||||
* that it can pull .docs() multiple times for the
|
||||
* current term. */
|
||||
|
||||
class BitSetTermsEnum extends TermsEnum {
|
||||
private final BitSetPostingsEnum postingsEnum;
|
||||
|
||||
public BitSetTermsEnum(BitSet docs) {
|
||||
postingsEnum = new BitSetPostingsEnum(docs);
|
||||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(BytesRef text) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(long ord) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef term() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef next() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ord() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docFreq() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long totalTermFreq() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public PostingsEnum postings(Bits liveDocs, PostingsEnum reuse, int flags) {
|
||||
if (flags != PostingsEnum.NONE) {
|
||||
// We only work with DOCS_ONLY fields
|
||||
return null;
|
||||
}
|
||||
if (liveDocs != null) {
|
||||
throw new IllegalArgumentException("cannot handle live docs");
|
||||
}
|
||||
postingsEnum.reset();
|
||||
return postingsEnum;
|
||||
}
|
||||
}
|
|
@ -34,6 +34,8 @@ import org.apache.lucene.index.IndexFileNames;
|
|||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.SegmentReadState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.search.PrefixQuery; // javadocs
|
||||
import org.apache.lucene.search.TermRangeQuery; // javadocs
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.Accountable;
|
||||
import org.apache.lucene.util.Accountables;
|
||||
|
@ -57,6 +59,14 @@ import org.apache.lucene.util.fst.Outputs;
|
|||
* min/maxItemsPerBlock during indexing to control how
|
||||
* much memory the terms index uses.</p>
|
||||
*
|
||||
* <p>If auto-prefix terms were indexed (see
|
||||
* {@link BlockTreeTermsWriter}), then the {@link Terms#intersect}
|
||||
* implementation here will make use of these terms only if the
|
||||
* automaton has a binary sink state, i.e. an accept state
|
||||
* which has a transition to itself accepting all byte values.
|
||||
* For example, both {@link PrefixQuery} and {@link TermRangeQuery}
|
||||
* pass such automata to {@link Terms#intersect}.</p>
|
||||
*
|
||||
* <p>The data structure used by this implementation is very
|
||||
* similar to a burst trie
|
||||
* (http://citeseer.ist.psu.edu/viewdoc/summary?doi=10.1.1.18.3499),
|
||||
|
@ -90,8 +100,11 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
/** Initial terms format. */
|
||||
public static final int VERSION_START = 0;
|
||||
|
||||
/** Auto-prefix terms. */
|
||||
public static final int VERSION_AUTO_PREFIX_TERMS = 1;
|
||||
|
||||
/** Current terms format. */
|
||||
public static final int VERSION_CURRENT = VERSION_START;
|
||||
public static final int VERSION_CURRENT = VERSION_AUTO_PREFIX_TERMS;
|
||||
|
||||
/** Extension of terms index file */
|
||||
static final String TERMS_INDEX_EXTENSION = "tip";
|
||||
|
@ -116,7 +129,7 @@ public final class BlockTreeTermsReader extends FieldsProducer {
|
|||
|
||||
final String segment;
|
||||
|
||||
private final int version;
|
||||
final int version;
|
||||
|
||||
/** Sole constructor. */
|
||||
public BlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException {
|
||||
|
|
|
@ -25,11 +25,13 @@ import org.apache.lucene.codecs.BlockTermState;
|
|||
import org.apache.lucene.codecs.CodecUtil;
|
||||
import org.apache.lucene.codecs.FieldsConsumer;
|
||||
import org.apache.lucene.codecs.PostingsWriterBase;
|
||||
import org.apache.lucene.codecs.blocktree.AutoPrefixTermsWriter.PrefixTerm;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.FieldInfos;
|
||||
import org.apache.lucene.index.Fields;
|
||||
import org.apache.lucene.index.IndexFileNames;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.SegmentWriteState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
|
@ -87,6 +89,16 @@ import org.apache.lucene.util.packed.PackedInts;
|
|||
* stride) each term's metadata for each set of terms
|
||||
* between two index terms.
|
||||
* <p>
|
||||
*
|
||||
* If {@code minItemsInAutoPrefix} is not zero, then for
|
||||
* {@link IndexOptions#DOCS} fields we detect prefixes that match
|
||||
* "enough" terms and insert auto-prefix terms into the index, which are
|
||||
* used by {@link Terms#intersect} at search time to speed up prefix
|
||||
* and range queries. Besides {@link Terms#intersect}, these
|
||||
* auto-prefix terms are invisible to all other APIs (don't change terms
|
||||
* stats, don't show up in normal {@link TermsEnum}s, etc.).
|
||||
* <p>
|
||||
*
|
||||
* Files:
|
||||
* <ul>
|
||||
* <li><tt>.tim</tt>: <a href="#Termdictionary">Term Dictionary</a></li>
|
||||
|
@ -200,7 +212,9 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
* #BlockTreeTermsWriter(SegmentWriteState,PostingsWriterBase,int,int)}. */
|
||||
public final static int DEFAULT_MAX_BLOCK_SIZE = 48;
|
||||
|
||||
// public final static boolean DEBUG = false;
|
||||
//public static boolean DEBUG = false;
|
||||
//public static boolean DEBUG2 = false;
|
||||
|
||||
//private final static boolean SAVE_DOT_FILES = false;
|
||||
|
||||
private final IndexOutput termsOut;
|
||||
|
@ -208,6 +222,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
final int maxDoc;
|
||||
final int minItemsInBlock;
|
||||
final int maxItemsInBlock;
|
||||
final int minItemsInAutoPrefix;
|
||||
final int maxItemsInAutoPrefix;
|
||||
|
||||
final PostingsWriterBase postingsWriter;
|
||||
final FieldInfos fieldInfos;
|
||||
|
@ -244,23 +260,67 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final List<FieldMetaData> fields = new ArrayList<>();
|
||||
|
||||
// private final String segment;
|
||||
final FixedBitSet prefixDocs;
|
||||
|
||||
/** Reused in getAutoPrefixTermsEnum: */
|
||||
final BitSetTermsEnum prefixFixedBitsTermsEnum;
|
||||
|
||||
/** Reused in getAutoPrefixTermsEnum: */
|
||||
private TermsEnum prefixTermsEnum;
|
||||
|
||||
/** Reused in getAutoPrefixTermsEnum: */
|
||||
private PostingsEnum prefixDocsEnum;
|
||||
|
||||
/** Create a new writer, using default values for auto-prefix terms. */
|
||||
public BlockTreeTermsWriter(SegmentWriteState state,
|
||||
PostingsWriterBase postingsWriter,
|
||||
int minItemsInBlock,
|
||||
int maxItemsInBlock) throws IOException {
|
||||
this(state, postingsWriter, minItemsInBlock, maxItemsInBlock, 0, 0);
|
||||
}
|
||||
|
||||
/** Create a new writer. The number of items (terms or
|
||||
* sub-blocks) per block will aim to be between
|
||||
* minItemsPerBlock and maxItemsPerBlock, though in some
|
||||
* cases the blocks may be smaller than the min. */
|
||||
* cases the blocks may be smaller than the min.
|
||||
* For DOCS_ONLY fields, this terms dictionary will
|
||||
* insert automatically generated prefix terms for common
|
||||
* prefixes, as long as each prefix matches at least
|
||||
* {@code minItemsInAutoPrefix} other terms or prefixes,
|
||||
* and at most {@code maxItemsInAutoPrefix} other terms
|
||||
* or prefixes. Set {@code minItemsInAutoPrefix} to 0
|
||||
* to disable auto-prefix terms. */
|
||||
public BlockTreeTermsWriter(SegmentWriteState state,
|
||||
PostingsWriterBase postingsWriter,
|
||||
int minItemsInBlock,
|
||||
int maxItemsInBlock)
|
||||
int maxItemsInBlock,
|
||||
int minItemsInAutoPrefix,
|
||||
int maxItemsInAutoPrefix)
|
||||
throws IOException
|
||||
{
|
||||
validateSettings(minItemsInBlock, maxItemsInBlock);
|
||||
validateSettings(minItemsInBlock,
|
||||
maxItemsInBlock);
|
||||
|
||||
this.minItemsInBlock = minItemsInBlock;
|
||||
this.maxItemsInBlock = maxItemsInBlock;
|
||||
|
||||
validateAutoPrefixSettings(minItemsInAutoPrefix,
|
||||
maxItemsInAutoPrefix);
|
||||
|
||||
if (minItemsInAutoPrefix != 0) {
|
||||
// TODO: can we used compressed bitset instead? that auto-upgrades if it's dense enough...
|
||||
prefixDocs = new FixedBitSet(state.segmentInfo.maxDoc());
|
||||
prefixFixedBitsTermsEnum = new BitSetTermsEnum(prefixDocs);
|
||||
} else {
|
||||
prefixDocs = null;
|
||||
prefixFixedBitsTermsEnum = null;
|
||||
}
|
||||
|
||||
this.minItemsInAutoPrefix = minItemsInAutoPrefix;
|
||||
this.maxItemsInAutoPrefix = maxItemsInAutoPrefix;
|
||||
|
||||
this.maxDoc = state.segmentInfo.maxDoc();
|
||||
this.fieldInfos = state.fieldInfos;
|
||||
this.minItemsInBlock = minItemsInBlock;
|
||||
this.maxItemsInBlock = maxItemsInBlock;
|
||||
this.postingsWriter = postingsWriter;
|
||||
|
||||
final String termsName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_EXTENSION);
|
||||
|
@ -269,12 +329,13 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
IndexOutput indexOut = null;
|
||||
try {
|
||||
CodecUtil.writeIndexHeader(termsOut, BlockTreeTermsReader.TERMS_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
|
||||
final String indexName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, BlockTreeTermsReader.TERMS_INDEX_EXTENSION);
|
||||
indexOut = state.directory.createOutput(indexName, state.context);
|
||||
CodecUtil.writeIndexHeader(indexOut, BlockTreeTermsReader.TERMS_INDEX_CODEC_NAME, BlockTreeTermsReader.VERSION_CURRENT,
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
state.segmentInfo.getId(), state.segmentSuffix);
|
||||
//segment = state.segmentInfo.name;
|
||||
|
||||
postingsWriter.init(termsOut, state); // have consumer write its format/header
|
||||
|
||||
|
@ -311,33 +372,108 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
}
|
||||
|
||||
/** Throws {@code IllegalArgumentException} if any of these settings
|
||||
* is invalid. */
|
||||
public static void validateAutoPrefixSettings(int minItemsInAutoPrefix,
|
||||
int maxItemsInAutoPrefix) {
|
||||
if (minItemsInAutoPrefix != 0) {
|
||||
if (minItemsInAutoPrefix < 2) {
|
||||
throw new IllegalArgumentException("minItemsInAutoPrefix must be at least 2; got minItemsInAutoPrefix=" + minItemsInAutoPrefix);
|
||||
}
|
||||
if (minItemsInAutoPrefix > maxItemsInAutoPrefix) {
|
||||
throw new IllegalArgumentException("maxItemsInAutoPrefix must be >= minItemsInAutoPrefix; got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
|
||||
}
|
||||
if (2*(minItemsInAutoPrefix-1) > maxItemsInAutoPrefix) {
|
||||
throw new IllegalArgumentException("maxItemsInAutoPrefix must be at least 2*(minItemsInAutoPrefix-1); got maxItemsInAutoPrefix=" + maxItemsInAutoPrefix + " minItemsInAutoPrefix=" + minItemsInAutoPrefix);
|
||||
}
|
||||
} else if (maxItemsInAutoPrefix != 0) {
|
||||
throw new IllegalArgumentException("maxItemsInAutoPrefix must be 0 (disabled) when minItemsInAutoPrefix is 0");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void write(Fields fields) throws IOException {
|
||||
//if (DEBUG) System.out.println("\nBTTW.write seg=" + segment);
|
||||
|
||||
String lastField = null;
|
||||
for(String field : fields) {
|
||||
assert lastField == null || lastField.compareTo(field) < 0;
|
||||
lastField = field;
|
||||
|
||||
//if (DEBUG) System.out.println("\nBTTW.write seg=" + segment + " field=" + field);
|
||||
Terms terms = fields.terms(field);
|
||||
if (terms == null) {
|
||||
continue;
|
||||
}
|
||||
FieldInfo fieldInfo = fieldInfos.fieldInfo(field);
|
||||
|
||||
// First pass to find all prefix terms we should compile into the index:
|
||||
List<PrefixTerm> prefixTerms;
|
||||
if (minItemsInAutoPrefix != 0) {
|
||||
if (fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
|
||||
throw new IllegalStateException("ranges can only be indexed with IndexOptions.DOCS (field: " + fieldInfo.name + ")");
|
||||
}
|
||||
prefixTerms = new AutoPrefixTermsWriter(terms, minItemsInAutoPrefix, maxItemsInAutoPrefix).prefixes;
|
||||
//if (DEBUG) {
|
||||
// for(PrefixTerm term : prefixTerms) {
|
||||
// System.out.println("field=" + fieldInfo.name + " PREFIX TERM: " + term);
|
||||
// }
|
||||
//}
|
||||
} else {
|
||||
prefixTerms = null;
|
||||
}
|
||||
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
|
||||
TermsWriter termsWriter = new TermsWriter(fieldInfos.fieldInfo(field));
|
||||
int prefixTermUpto = 0;
|
||||
while (true) {
|
||||
BytesRef term = termsEnum.next();
|
||||
//if (DEBUG) System.out.println("BTTW: next term " + term);
|
||||
|
||||
// Insert (merge sort) next prefix term(s):
|
||||
if (prefixTerms != null) {
|
||||
while (prefixTermUpto < prefixTerms.size() && (term == null || prefixTerms.get(prefixTermUpto).compareTo(term) <= 0)) {
|
||||
PrefixTerm prefixTerm = prefixTerms.get(prefixTermUpto);
|
||||
//if (DEBUG) System.out.println("seg=" + segment + " field=" + fieldInfo.name + " NOW INSERT prefix=" + prefixTerm);
|
||||
termsWriter.write(prefixTerm.term, getAutoPrefixTermsEnum(terms, prefixTerm), prefixTerm);
|
||||
prefixTermUpto++;
|
||||
}
|
||||
}
|
||||
|
||||
if (term == null) {
|
||||
break;
|
||||
}
|
||||
termsWriter.write(term, termsEnum);
|
||||
|
||||
//if (DEBUG) System.out.println("write field=" + fieldInfo.name + " term=" + brToString(term));
|
||||
termsWriter.write(term, termsEnum, null);
|
||||
}
|
||||
|
||||
assert prefixTerms == null || prefixTermUpto == prefixTerms.size();
|
||||
|
||||
termsWriter.finish();
|
||||
|
||||
//if (DEBUG) System.out.println("\nBTTW.write done seg=" + segment + " field=" + field);
|
||||
}
|
||||
}
|
||||
|
||||
private TermsEnum getAutoPrefixTermsEnum(Terms terms, final PrefixTerm prefix) throws IOException {
|
||||
assert prefixDocs != null;
|
||||
prefixDocs.clear(0, prefixDocs.length());
|
||||
|
||||
prefixTermsEnum = prefix.getTermsEnum(terms.iterator(prefixTermsEnum));
|
||||
|
||||
//System.out.println("BTTW.getAutoPrefixTE: prefix=" + prefix);
|
||||
while (prefixTermsEnum.next() != null) {
|
||||
//System.out.println(" got term=" + prefixTermsEnum.term().utf8ToString());
|
||||
//termCount++;
|
||||
prefixDocsEnum = prefixTermsEnum.postings(null, prefixDocsEnum, 0);
|
||||
//System.out.println(" " + prefixDocsEnum + " doc=" + prefixDocsEnum.docID());
|
||||
prefixDocs.or(prefixDocsEnum);
|
||||
}
|
||||
|
||||
//System.out.println(" done terms: " + prefixDocs.cardinality() + " doc seen; " + termCount + " terms seen");
|
||||
return prefixFixedBitsTermsEnum;
|
||||
}
|
||||
|
||||
static long encodeOutput(long fp, boolean hasTerms, boolean isFloor) {
|
||||
assert fp < (1L << 62);
|
||||
|
@ -356,30 +492,38 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
public final byte[] termBytes;
|
||||
// stats + metadata
|
||||
public final BlockTermState state;
|
||||
// Non-null if this is an auto-prefix-term:
|
||||
public final PrefixTerm prefixTerm;
|
||||
public PendingTerm other;
|
||||
|
||||
public PendingTerm(BytesRef term, BlockTermState state) {
|
||||
public PendingTerm(BytesRef term, BlockTermState state, PrefixTerm prefixTerm) {
|
||||
super(true);
|
||||
this.termBytes = new byte[term.length];
|
||||
System.arraycopy(term.bytes, term.offset, termBytes, 0, term.length);
|
||||
this.state = state;
|
||||
this.prefixTerm = prefixTerm;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return brToString(termBytes);
|
||||
return "TERM: " + brToString(termBytes);
|
||||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
if (b == null) {
|
||||
return "(null)";
|
||||
} else {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -410,7 +554,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "BLOCK: " + brToString(prefix);
|
||||
return "BLOCK: prefix=" + brToString(prefix);
|
||||
}
|
||||
|
||||
public void compileIndex(List<PendingBlock> blocks, RAMOutputStream scratchBytes, IntsRefBuilder scratchIntsRef) throws IOException {
|
||||
|
@ -493,6 +637,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
private final RAMOutputStream scratchBytes = new RAMOutputStream();
|
||||
private final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
|
||||
|
||||
static final BytesRef EMPTY_BYTES_REF = new BytesRef();
|
||||
|
||||
class TermsWriter {
|
||||
private final FieldInfo fieldInfo;
|
||||
private final int longsSize;
|
||||
|
@ -529,14 +675,11 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
assert count > 0;
|
||||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
BytesRef br = new BytesRef(lastTerm.bytes);
|
||||
br.offset = lastTerm.offset;
|
||||
br.length = prefixLength;
|
||||
System.out.println("writeBlocks: " + br.utf8ToString() + " count=" + count);
|
||||
}
|
||||
*/
|
||||
//if (DEBUG2) {
|
||||
// BytesRef br = new BytesRef(lastTerm.bytes());
|
||||
// br.length = prefixLength;
|
||||
// System.out.println("writeBlocks: seg=" + segment + " prefix=" + brToString(br) + " count=" + count);
|
||||
//}
|
||||
|
||||
// Root block better write all remaining pending entries:
|
||||
assert prefixLength > 0 || count == pending.size();
|
||||
|
@ -547,6 +690,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// only points to sub-blocks in the terms index so we can avoid seeking
|
||||
// to it when we are looking for a term):
|
||||
boolean hasTerms = false;
|
||||
boolean hasPrefixTerms = false;
|
||||
boolean hasSubBlocks = false;
|
||||
|
||||
int start = pending.size()-count;
|
||||
|
@ -566,7 +710,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// Suffix is 0, i.e. prefix 'foo' and term is
|
||||
// 'foo' so the term has empty string suffix
|
||||
// in this block
|
||||
assert lastSuffixLeadLabel == -1;
|
||||
assert lastSuffixLeadLabel == -1: "i=" + i + " lastSuffixLeadLabel=" + lastSuffixLeadLabel;
|
||||
suffixLeadLabel = -1;
|
||||
} else {
|
||||
suffixLeadLabel = term.termBytes[prefixLength] & 0xff;
|
||||
|
@ -587,10 +731,11 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// block as soon as we have at least minItemsInBlock. This is not always best: it often produces
|
||||
// a too-small block as the final block:
|
||||
boolean isFloor = itemsInBlock < count;
|
||||
newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasSubBlocks));
|
||||
newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, i, hasTerms, hasPrefixTerms, hasSubBlocks));
|
||||
|
||||
hasTerms = false;
|
||||
hasSubBlocks = false;
|
||||
hasPrefixTerms = false;
|
||||
nextFloorLeadLabel = suffixLeadLabel;
|
||||
nextBlockStart = i;
|
||||
}
|
||||
|
@ -600,6 +745,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
if (ent.isTerm) {
|
||||
hasTerms = true;
|
||||
hasPrefixTerms |= ((PendingTerm) ent).prefixTerm != null;
|
||||
} else {
|
||||
hasSubBlocks = true;
|
||||
}
|
||||
|
@ -609,7 +755,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
if (nextBlockStart < end) {
|
||||
int itemsInBlock = end - nextBlockStart;
|
||||
boolean isFloor = itemsInBlock < count;
|
||||
newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasSubBlocks));
|
||||
newBlocks.add(writeBlock(prefixLength, isFloor, nextFloorLeadLabel, nextBlockStart, end, hasTerms, hasPrefixTerms, hasSubBlocks));
|
||||
}
|
||||
|
||||
assert newBlocks.isEmpty() == false;
|
||||
|
@ -634,7 +780,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
* were too many (more than maxItemsInBlock) entries sharing the
|
||||
* same prefix, and so we broke it into multiple floor blocks where
|
||||
* we record the starting label of the suffix of each floor block. */
|
||||
private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end, boolean hasTerms, boolean hasSubBlocks) throws IOException {
|
||||
private PendingBlock writeBlock(int prefixLength, boolean isFloor, int floorLeadLabel, int start, int end,
|
||||
boolean hasTerms, boolean hasPrefixTerms, boolean hasSubBlocks) throws IOException {
|
||||
|
||||
assert end > start;
|
||||
|
||||
|
@ -646,6 +793,8 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
System.arraycopy(lastTerm.get().bytes, 0, prefix.bytes, 0, prefixLength);
|
||||
prefix.length = prefixLength;
|
||||
|
||||
//if (DEBUG2) System.out.println(" writeBlock field=" + fieldInfo.name + " prefix=" + brToString(prefix) + " fp=" + startFP + " isFloor=" + isFloor + " isLastInFloor=" + (end == pending.size()) + " floorLeadLabel=" + floorLeadLabel + " start=" + start + " end=" + end + " hasTerms=" + hasTerms + " hasSubBlocks=" + hasSubBlocks);
|
||||
|
||||
// Write block header:
|
||||
int numEntries = end - start;
|
||||
int code = numEntries << 1;
|
||||
|
@ -666,31 +815,34 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
// We optimize the leaf block case (block has only terms), writing a more
|
||||
// compact format in this case:
|
||||
boolean isLeafBlock = hasSubBlocks == false;
|
||||
boolean isLeafBlock = hasSubBlocks == false && hasPrefixTerms == false;
|
||||
|
||||
//System.out.println(" isLeaf=" + isLeafBlock);
|
||||
|
||||
final List<FST<BytesRef>> subIndices;
|
||||
|
||||
boolean absolute = true;
|
||||
|
||||
if (isLeafBlock) {
|
||||
// Only terms:
|
||||
// Block contains only ordinary terms:
|
||||
subIndices = null;
|
||||
for (int i=start;i<end;i++) {
|
||||
PendingEntry ent = pending.get(i);
|
||||
assert ent.isTerm: "i=" + i;
|
||||
|
||||
PendingTerm term = (PendingTerm) ent;
|
||||
assert term.prefixTerm == null;
|
||||
|
||||
assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
|
||||
BlockTermState state = term.state;
|
||||
final int suffix = term.termBytes.length - prefixLength;
|
||||
/*
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
}
|
||||
*/
|
||||
//if (DEBUG2) {
|
||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
//}
|
||||
|
||||
// For leaf block we write suffix straight
|
||||
suffixWriter.writeVInt(suffix);
|
||||
suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
|
||||
|
@ -714,27 +866,51 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
absolute = false;
|
||||
}
|
||||
} else {
|
||||
// Mixed terms and sub-blocks:
|
||||
// Block has at least one prefix term or a sub block:
|
||||
subIndices = new ArrayList<>();
|
||||
boolean sawAutoPrefixTerm = false;
|
||||
for (int i=start;i<end;i++) {
|
||||
PendingEntry ent = pending.get(i);
|
||||
if (ent.isTerm) {
|
||||
PendingTerm term = (PendingTerm) ent;
|
||||
|
||||
assert StringHelper.startsWith(term.termBytes, prefix): "term.term=" + term.termBytes + " prefix=" + prefix;
|
||||
BlockTermState state = term.state;
|
||||
final int suffix = term.termBytes.length - prefixLength;
|
||||
/*
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
}
|
||||
*/
|
||||
//if (DEBUG2) {
|
||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(term.termBytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write term suffix=" + brToString(suffixBytes));
|
||||
// if (term.prefixTerm != null) {
|
||||
// System.out.println(" ** auto-prefix term: " + term.prefixTerm);
|
||||
// }
|
||||
//}
|
||||
|
||||
// For non-leaf block we borrow 1 bit to record
|
||||
// if entry is term or sub-block
|
||||
suffixWriter.writeVInt(suffix<<1);
|
||||
// if entry is term or sub-block, and 1 bit to record if
|
||||
// it's a prefix term. Terms cannot be larger than ~32 KB
|
||||
// so we won't run out of bits:
|
||||
code = suffix<<2;
|
||||
int floorLeadEnd = -1;
|
||||
if (term.prefixTerm != null) {
|
||||
sawAutoPrefixTerm = true;
|
||||
PrefixTerm prefixTerm = term.prefixTerm;
|
||||
floorLeadEnd = prefixTerm.floorLeadEnd;
|
||||
assert floorLeadEnd != -1;
|
||||
|
||||
if (prefixTerm.floorLeadStart == -2) {
|
||||
// Starts with empty string
|
||||
code |= 2;
|
||||
} else {
|
||||
code |= 3;
|
||||
}
|
||||
}
|
||||
suffixWriter.writeVInt(code);
|
||||
suffixWriter.writeBytes(term.termBytes, prefixLength, suffix);
|
||||
if (floorLeadEnd != -1) {
|
||||
suffixWriter.writeByte((byte) floorLeadEnd);
|
||||
}
|
||||
assert floorLeadLabel == -1 || (term.termBytes[prefixLength] & 0xff) >= floorLeadLabel;
|
||||
|
||||
// Write term stats, to separate byte[] blob:
|
||||
|
@ -765,33 +941,32 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
PendingBlock block = (PendingBlock) ent;
|
||||
assert StringHelper.startsWith(block.prefix, prefix);
|
||||
final int suffix = block.prefix.length - prefixLength;
|
||||
assert StringHelper.startsWith(block.prefix, prefix);
|
||||
|
||||
assert suffix > 0;
|
||||
|
||||
// For non-leaf block we borrow 1 bit to record
|
||||
// if entry is term or sub-block
|
||||
suffixWriter.writeVInt((suffix<<1)|1);
|
||||
// if entry is term or sub-block, and 1 bit (unset here) to
|
||||
// record if it's a prefix term:
|
||||
suffixWriter.writeVInt((suffix<<2)|1);
|
||||
suffixWriter.writeBytes(block.prefix.bytes, prefixLength, suffix);
|
||||
|
||||
assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel;
|
||||
//if (DEBUG2) {
|
||||
// BytesRef suffixBytes = new BytesRef(suffix);
|
||||
// System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
// suffixBytes.length = suffix;
|
||||
// System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
//}
|
||||
|
||||
assert floorLeadLabel == -1 || (block.prefix.bytes[prefixLength] & 0xff) >= floorLeadLabel: "floorLeadLabel=" + floorLeadLabel + " suffixLead=" + (block.prefix.bytes[prefixLength] & 0xff);
|
||||
assert block.fp < startFP;
|
||||
|
||||
/*
|
||||
if (DEBUG) {
|
||||
BytesRef suffixBytes = new BytesRef(suffix);
|
||||
System.arraycopy(block.prefix.bytes, prefixLength, suffixBytes.bytes, 0, suffix);
|
||||
suffixBytes.length = suffix;
|
||||
System.out.println(" write sub-block suffix=" + brToString(suffixBytes) + " subFP=" + block.fp + " subCode=" + (startFP-block.fp) + " floor=" + block.isFloor);
|
||||
}
|
||||
*/
|
||||
|
||||
suffixWriter.writeVLong(startFP - block.fp);
|
||||
subIndices.add(block.index);
|
||||
}
|
||||
}
|
||||
|
||||
assert subIndices.size() != 0;
|
||||
assert subIndices.size() != 0 || sawAutoPrefixTerm;
|
||||
}
|
||||
|
||||
// TODO: we could block-write the term suffix pointers;
|
||||
|
@ -835,7 +1010,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
}
|
||||
|
||||
/** Writes one term's worth of postings. */
|
||||
public void write(BytesRef text, TermsEnum termsEnum) throws IOException {
|
||||
public void write(BytesRef text, TermsEnum termsEnum, PrefixTerm prefixTerm) throws IOException {
|
||||
/*
|
||||
if (DEBUG) {
|
||||
int[] tmp = new int[lastTerm.length];
|
||||
|
@ -846,19 +1021,25 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
|
||||
BlockTermState state = postingsWriter.writeTerm(text, termsEnum, docsSeen);
|
||||
if (state != null) {
|
||||
|
||||
assert state.docFreq != 0;
|
||||
assert fieldInfo.getIndexOptions() == IndexOptions.DOCS || state.totalTermFreq >= state.docFreq: "postingsWriter=" + postingsWriter;
|
||||
sumDocFreq += state.docFreq;
|
||||
sumTotalTermFreq += state.totalTermFreq;
|
||||
pushTerm(text);
|
||||
|
||||
PendingTerm term = new PendingTerm(text, state);
|
||||
PendingTerm term = new PendingTerm(text, state, prefixTerm);
|
||||
pending.add(term);
|
||||
numTerms++;
|
||||
if (firstPendingTerm == null) {
|
||||
firstPendingTerm = term;
|
||||
//if (DEBUG) System.out.println(" add pending term = " + text + " pending.size()=" + pending.size());
|
||||
|
||||
if (prefixTerm == null) {
|
||||
// Only increment stats for real terms:
|
||||
sumDocFreq += state.docFreq;
|
||||
sumTotalTermFreq += state.totalTermFreq;
|
||||
numTerms++;
|
||||
if (firstPendingTerm == null) {
|
||||
firstPendingTerm = term;
|
||||
}
|
||||
lastPendingTerm = term;
|
||||
}
|
||||
lastPendingTerm = term;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -910,6 +1091,7 @@ public final class BlockTreeTermsWriter extends FieldsConsumer {
|
|||
// TODO: if pending.size() is already 1 with a non-zero prefix length
|
||||
// we can save writing a "degenerate" root block, but we have to
|
||||
// fix all the places that assume the root block's prefix is the empty string:
|
||||
pushTerm(new BytesRef());
|
||||
writeBlocks(0, pending.size());
|
||||
|
||||
// We better have one final "root" block:
|
||||
|
|
|
@ -41,6 +41,8 @@ import org.apache.lucene.util.fst.FST;
|
|||
*/
|
||||
public final class FieldReader extends Terms implements Accountable {
|
||||
|
||||
// private final boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||
|
||||
private static final long BASE_RAM_BYTES_USED =
|
||||
RamUsageEstimator.shallowSizeOfInstance(FieldReader.class)
|
||||
+ 3 * RamUsageEstimator.shallowSizeOfInstance(BytesRef.class);
|
||||
|
@ -125,6 +127,7 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
/** For debugging -- used by CheckIndex too*/
|
||||
@Override
|
||||
public Stats getStats() throws IOException {
|
||||
// TODO: add auto-prefix terms into stats
|
||||
return new SegmentTermsEnum(this).computeBlockStats();
|
||||
}
|
||||
|
||||
|
@ -175,10 +178,11 @@ public final class FieldReader extends Terms implements Accountable {
|
|||
|
||||
@Override
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
|
||||
}
|
||||
return new IntersectTermsEnum(this, compiled, startTerm);
|
||||
// if (DEBUG) System.out.println(" FieldReader.intersect startTerm=" + BlockTreeTermsWriter.brToString(startTerm));
|
||||
//System.out.println("intersect: " + compiled.type + " a=" + compiled.automaton);
|
||||
// TODO: we could push "it's a range" or "it's a prefix" down into IntersectTermsEnum?
|
||||
// can we optimize knowing that...?
|
||||
return new IntersectTermsEnum(this, compiled.automaton, compiled.runAutomaton, compiled.commonSuffixRef, startTerm, compiled.sinkState);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.PostingsEnum;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
@ -28,23 +29,38 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.RunAutomaton;
|
||||
import org.apache.lucene.util.automaton.Transition;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Outputs;
|
||||
|
||||
// NOTE: cannot seek!
|
||||
/** This is used to implement efficient {@link Terms#intersect} for
|
||||
* block-tree. Note that it cannot seek, except for the initial term on
|
||||
* init. It just "nexts" through the intersection of the automaton and
|
||||
* the terms. It does not use the terms index at all: on init, it
|
||||
* loads the root block, and scans its way to the initial term.
|
||||
* Likewise, in next it scans until it finds a term that matches the
|
||||
* current automaton transition. If the index has auto-prefix terms
|
||||
* (only for DOCS_ONLY fields currently) it will visit these terms
|
||||
* when possible and then skip the real terms that auto-prefix term
|
||||
* matched. */
|
||||
|
||||
final class IntersectTermsEnum extends TermsEnum {
|
||||
|
||||
//static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||
|
||||
final IndexInput in;
|
||||
final static Outputs<BytesRef> fstOutputs = ByteSequenceOutputs.getSingleton();
|
||||
|
||||
private IntersectTermsEnumFrame[] stack;
|
||||
IntersectTermsEnumFrame[] stack;
|
||||
|
||||
@SuppressWarnings({"rawtypes","unchecked"}) private FST.Arc<BytesRef>[] arcs = new FST.Arc[5];
|
||||
|
||||
final RunAutomaton runAutomaton;
|
||||
final CompiledAutomaton compiledAutomaton;
|
||||
final Automaton automaton;
|
||||
final BytesRef commonSuffix;
|
||||
|
||||
private IntersectTermsEnumFrame currentFrame;
|
||||
|
||||
|
@ -52,19 +68,34 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
|
||||
private final FST.BytesReader fstReader;
|
||||
|
||||
private final boolean allowAutoPrefixTerms;
|
||||
|
||||
final FieldReader fr;
|
||||
|
||||
/** Which state in the automaton accepts all possible suffixes. */
|
||||
private final int sinkState;
|
||||
|
||||
private BytesRef savedStartTerm;
|
||||
|
||||
/** True if we did return the current auto-prefix term */
|
||||
private boolean useAutoPrefixTerm;
|
||||
|
||||
// TODO: in some cases we can filter by length? eg
|
||||
// regexp foo*bar must be at least length 6 bytes
|
||||
public IntersectTermsEnum(FieldReader fr, CompiledAutomaton compiled, BytesRef startTerm) throws IOException {
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nintEnum.init seg=" + segment + " commonSuffix=" + brToString(compiled.commonSuffixRef));
|
||||
// }
|
||||
public IntersectTermsEnum(FieldReader fr, Automaton automaton, RunAutomaton runAutomaton, BytesRef commonSuffix, BytesRef startTerm, int sinkState) throws IOException {
|
||||
//if (DEBUG) System.out.println("\nintEnum.init seg=" + fr.parent.segment + " commonSuffix=" + commonSuffix);
|
||||
this.fr = fr;
|
||||
runAutomaton = compiled.runAutomaton;
|
||||
compiledAutomaton = compiled;
|
||||
this.sinkState = sinkState;
|
||||
|
||||
assert automaton != null;
|
||||
assert runAutomaton != null;
|
||||
|
||||
//if (DEBUG) System.out.println("sinkState=" + sinkState + " AUTOMATON:\n" + automaton.toDot());
|
||||
this.runAutomaton = runAutomaton;
|
||||
this.allowAutoPrefixTerms = sinkState != -1;
|
||||
this.automaton = automaton;
|
||||
this.commonSuffix = commonSuffix;
|
||||
|
||||
in = fr.parent.termsIn.clone();
|
||||
stack = new IntersectTermsEnumFrame[5];
|
||||
for(int idx=0;idx<stack.length;idx++) {
|
||||
|
@ -152,7 +183,7 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
|
||||
f.fp = f.fpOrig = currentFrame.lastSubFP;
|
||||
f.prefix = currentFrame.prefix + currentFrame.suffix;
|
||||
// if (DEBUG) System.out.println(" pushFrame state=" + state + " prefix=" + f.prefix);
|
||||
//if (DEBUG) System.out.println(" pushFrame state=" + state + " prefix=" + f.prefix);
|
||||
f.setState(state);
|
||||
|
||||
// Walk the arc through the index -- we only
|
||||
|
@ -220,7 +251,7 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
// arbitrary seekExact/Ceil. Note that this is a
|
||||
// seekFloor!
|
||||
private void seekToStartTerm(BytesRef target) throws IOException {
|
||||
//if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString());
|
||||
//if (DEBUG) System.out.println("seek to startTerm=" + target.utf8ToString() + " length=" + target.length);
|
||||
assert currentFrame.ord == 0;
|
||||
if (term.length < target.length) {
|
||||
term.bytes = ArrayUtil.grow(term.bytes, target.length);
|
||||
|
@ -229,23 +260,29 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
assert arc == currentFrame.arc;
|
||||
|
||||
for(int idx=0;idx<=target.length;idx++) {
|
||||
//if (DEBUG) System.out.println("cycle idx=" + idx);
|
||||
|
||||
while (true) {
|
||||
final int savNextEnt = currentFrame.nextEnt;
|
||||
final int savePos = currentFrame.suffixesReader.getPosition();
|
||||
final int saveStartBytePos = currentFrame.startBytePos;
|
||||
final int saveSuffix = currentFrame.suffix;
|
||||
final long saveLastSubFP = currentFrame.lastSubFP;
|
||||
final int saveTermBlockOrd = currentFrame.termState.termBlockOrd;
|
||||
final boolean saveIsAutoPrefixTerm = currentFrame.isAutoPrefixTerm;
|
||||
|
||||
//if (DEBUG) System.out.println(" cycle isAutoPrefix=" + saveIsAutoPrefixTerm + " ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
|
||||
|
||||
final boolean isSubBlock = currentFrame.next();
|
||||
|
||||
//if (DEBUG) System.out.println(" cycle ent=" + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") prefix=" + currentFrame.prefix + " suffix=" + currentFrame.suffix + " isBlock=" + isSubBlock + " firstLabel=" + (currentFrame.suffix == 0 ? "" : (currentFrame.suffixBytes[currentFrame.startBytePos])&0xff));
|
||||
term.length = currentFrame.prefix + currentFrame.suffix;
|
||||
if (term.bytes.length < term.length) {
|
||||
term.bytes = ArrayUtil.grow(term.bytes, term.length);
|
||||
}
|
||||
System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
|
||||
|
||||
//if (DEBUG) System.out.println(" isSubBlock=" + isSubBlock + " term/prefix=" + brToString(term) + " saveIsAutoPrefixTerm=" + saveIsAutoPrefixTerm + " allowAutoPrefixTerms=" + allowAutoPrefixTerms);
|
||||
|
||||
if (isSubBlock && StringHelper.startsWith(target, term)) {
|
||||
// Recurse
|
||||
//if (DEBUG) System.out.println(" recurse!");
|
||||
|
@ -253,9 +290,11 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
break;
|
||||
} else {
|
||||
final int cmp = term.compareTo(target);
|
||||
//if (DEBUG) System.out.println(" cmp=" + cmp);
|
||||
if (cmp < 0) {
|
||||
if (currentFrame.nextEnt == currentFrame.entCount) {
|
||||
if (!currentFrame.isLastInFloor) {
|
||||
// Advance to next floor block
|
||||
//if (DEBUG) System.out.println(" load floorBlock");
|
||||
currentFrame.loadNextFloorBlock();
|
||||
continue;
|
||||
|
@ -266,19 +305,24 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
}
|
||||
continue;
|
||||
} else if (cmp == 0) {
|
||||
if (allowAutoPrefixTerms == false && currentFrame.isAutoPrefixTerm) {
|
||||
continue;
|
||||
}
|
||||
//if (DEBUG) System.out.println(" return term=" + brToString(term));
|
||||
return;
|
||||
} else {
|
||||
} else if (allowAutoPrefixTerms || currentFrame.isAutoPrefixTerm == false) {
|
||||
// Fallback to prior entry: the semantics of
|
||||
// this method is that the first call to
|
||||
// next() will return the term after the
|
||||
// requested term
|
||||
currentFrame.nextEnt--;
|
||||
//if (DEBUG) System.out.println(" fallback prior entry");
|
||||
currentFrame.nextEnt = savNextEnt;
|
||||
currentFrame.lastSubFP = saveLastSubFP;
|
||||
currentFrame.startBytePos = saveStartBytePos;
|
||||
currentFrame.suffix = saveSuffix;
|
||||
currentFrame.suffixesReader.setPosition(savePos);
|
||||
currentFrame.termState.termBlockOrd = saveTermBlockOrd;
|
||||
currentFrame.isAutoPrefixTerm = saveIsAutoPrefixTerm;
|
||||
System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, term.bytes, currentFrame.prefix, currentFrame.suffix);
|
||||
term.length = currentFrame.prefix + currentFrame.suffix;
|
||||
// If the last entry was a block we don't
|
||||
|
@ -297,77 +341,249 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
@Override
|
||||
public BytesRef next() throws IOException {
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nintEnum.next seg=" + segment);
|
||||
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
|
||||
// }
|
||||
//if (DEBUG) {
|
||||
// System.out.println("\nintEnum.next seg=" + fr.parent.segment);
|
||||
// System.out.println(" frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix + " trans: " + currentFrame.transition + " useAutoPrefix=" + useAutoPrefixTerm);
|
||||
//}
|
||||
|
||||
nextTerm:
|
||||
while(true) {
|
||||
// Pop finished frames
|
||||
while (currentFrame.nextEnt == currentFrame.entCount) {
|
||||
if (!currentFrame.isLastInFloor) {
|
||||
//if (DEBUG) System.out.println(" next-floor-block");
|
||||
currentFrame.loadNextFloorBlock();
|
||||
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
|
||||
} else {
|
||||
//if (DEBUG) System.out.println(" pop frame");
|
||||
if (currentFrame.ord == 0) {
|
||||
return null;
|
||||
while (true) {
|
||||
|
||||
boolean isSubBlock;
|
||||
|
||||
if (useAutoPrefixTerm) {
|
||||
|
||||
assert currentFrame.isAutoPrefixTerm;
|
||||
useAutoPrefixTerm = false;
|
||||
currentFrame.termState.isRealTerm = true;
|
||||
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" now scan beyond auto-prefix term=" + brToString(term) + " floorSuffixLeadEnd=" + Integer.toHexString(currentFrame.floorSuffixLeadEnd));
|
||||
//}
|
||||
// If we last returned an auto-prefix term, we must now skip all
|
||||
// actual terms sharing that prefix. At most, that skipping
|
||||
// requires popping one frame, but it can also require simply
|
||||
// scanning ahead within the current frame. This scanning will
|
||||
// skip sub-blocks that contain many terms, which is why the
|
||||
// optimization "works":
|
||||
int floorSuffixLeadEnd = currentFrame.floorSuffixLeadEnd;
|
||||
if (floorSuffixLeadEnd == -1) {
|
||||
// An ordinary prefix, e.g. foo*
|
||||
int prefix = currentFrame.prefix;
|
||||
int suffix = currentFrame.suffix;
|
||||
//if (DEBUG) System.out.println(" prefix=" + prefix + " suffix=" + suffix);
|
||||
if (suffix == 0) {
|
||||
//if (DEBUG) System.out.println(" pop frame & nextTerm");
|
||||
|
||||
// Easy case: the prefix term's suffix is the empty string,
|
||||
// meaning the prefix corresponds to all terms in the
|
||||
// current block, so we just pop this entire block:
|
||||
if (currentFrame.ord == 0) {
|
||||
//if (DEBUG) System.out.println(" return null");
|
||||
return null;
|
||||
}
|
||||
currentFrame = stack[currentFrame.ord-1];
|
||||
continue nextTerm;
|
||||
} else {
|
||||
|
||||
// Just next() until we hit an entry that doesn't share this
|
||||
// prefix. The first next should be a sub-block sharing the
|
||||
// same prefix, because if there are enough terms matching a
|
||||
// given prefix to warrant an auto-prefix term, then there
|
||||
// must also be enough to make a sub-block (assuming
|
||||
// minItemsInPrefix > minItemsInBlock):
|
||||
scanPrefix:
|
||||
while (true) {
|
||||
//if (DEBUG) System.out.println(" scan next");
|
||||
if (currentFrame.nextEnt == currentFrame.entCount) {
|
||||
if (currentFrame.isLastInFloor == false) {
|
||||
currentFrame.loadNextFloorBlock();
|
||||
} else if (currentFrame.ord == 0) {
|
||||
//if (DEBUG) System.out.println(" return null0");
|
||||
return null;
|
||||
} else {
|
||||
// Pop frame, which also means we've moved beyond this
|
||||
// auto-prefix term:
|
||||
//if (DEBUG) System.out.println(" pop; nextTerm");
|
||||
currentFrame = stack[currentFrame.ord-1];
|
||||
continue nextTerm;
|
||||
}
|
||||
}
|
||||
isSubBlock = currentFrame.next();
|
||||
//if (DEBUG) {
|
||||
// BytesRef suffixBytes = new BytesRef(currentFrame.suffix);
|
||||
// System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, suffixBytes.bytes, 0, currentFrame.suffix);
|
||||
// suffixBytes.length = currentFrame.suffix;
|
||||
// System.out.println(" currentFrame.suffix=" + brToString(suffixBytes));
|
||||
//}
|
||||
for(int i=0;i<suffix;i++) {
|
||||
if (term.bytes[prefix+i] != currentFrame.suffixBytes[currentFrame.startBytePos+i]) {
|
||||
//if (DEBUG) System.out.println(" done; now stop scan");
|
||||
break scanPrefix;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Floor'd auto-prefix term; in this case we must skip all
|
||||
// terms e.g. matching foo[a-m]*. We are currently "on" fooa,
|
||||
// which the automaton accepted (fooa* through foom*), and
|
||||
// floorSuffixLeadEnd is m, so we must now scan to foon:
|
||||
int prefix = currentFrame.prefix;
|
||||
int suffix = currentFrame.suffix;
|
||||
|
||||
if (currentFrame.floorSuffixLeadStart == -1) {
|
||||
suffix++;
|
||||
}
|
||||
|
||||
//if (DEBUG) System.out.println(" prefix=" + prefix + " suffix=" + suffix);
|
||||
|
||||
if (suffix == 0) {
|
||||
|
||||
//if (DEBUG) System.out.println(" pop frame");
|
||||
|
||||
// This means current frame is fooa*, so we have to first
|
||||
// pop the current frame, then scan in parent frame:
|
||||
if (currentFrame.ord == 0) {
|
||||
//if (DEBUG) System.out.println(" return null");
|
||||
return null;
|
||||
}
|
||||
currentFrame = stack[currentFrame.ord-1];
|
||||
|
||||
// Current (parent) frame is now foo*, so now we just scan
|
||||
// until the lead suffix byte is > floorSuffixLeadEnd
|
||||
//assert currentFrame.prefix == prefix-1;
|
||||
//prefix = currentFrame.prefix;
|
||||
|
||||
// In case when we pop, and the parent block is not just prefix-1, e.g. in block 417* on
|
||||
// its first term = floor prefix term 41[7-9], popping to block 4*:
|
||||
prefix = currentFrame.prefix;
|
||||
|
||||
suffix = term.length - currentFrame.prefix;
|
||||
} else {
|
||||
// No need to pop; just scan in currentFrame:
|
||||
}
|
||||
|
||||
//if (DEBUG) System.out.println(" start scan: prefix=" + prefix + " suffix=" + suffix);
|
||||
|
||||
// Now we scan until the lead suffix byte is > floorSuffixLeadEnd
|
||||
scanFloor:
|
||||
while (true) {
|
||||
//if (DEBUG) System.out.println(" scan next");
|
||||
if (currentFrame.nextEnt == currentFrame.entCount) {
|
||||
if (currentFrame.isLastInFloor == false) {
|
||||
//if (DEBUG) System.out.println(" next floor block");
|
||||
currentFrame.loadNextFloorBlock();
|
||||
} else if (currentFrame.ord == 0) {
|
||||
//if (DEBUG) System.out.println(" return null");
|
||||
return null;
|
||||
} else {
|
||||
// Pop frame, which also means we've moved beyond this
|
||||
// auto-prefix term:
|
||||
currentFrame = stack[currentFrame.ord-1];
|
||||
//if (DEBUG) System.out.println(" pop, now curFrame.prefix=" + currentFrame.prefix);
|
||||
continue nextTerm;
|
||||
}
|
||||
}
|
||||
isSubBlock = currentFrame.next();
|
||||
//if (DEBUG) {
|
||||
// BytesRef suffixBytes = new BytesRef(currentFrame.suffix);
|
||||
// System.arraycopy(currentFrame.suffixBytes, currentFrame.startBytePos, suffixBytes.bytes, 0, currentFrame.suffix);
|
||||
// suffixBytes.length = currentFrame.suffix;
|
||||
// System.out.println(" currentFrame.suffix=" + brToString(suffixBytes));
|
||||
//}
|
||||
for(int i=0;i<suffix-1;i++) {
|
||||
if (term.bytes[prefix+i] != currentFrame.suffixBytes[currentFrame.startBytePos+i]) {
|
||||
//if (DEBUG) System.out.println(" done; now stop scan");
|
||||
break scanFloor;
|
||||
}
|
||||
}
|
||||
//if (DEBUG) {
|
||||
// if (currentFrame.suffix >= suffix) {
|
||||
// System.out.println(" cmp label=" + Integer.toHexString(currentFrame.suffixBytes[currentFrame.startBytePos+suffix-1]) + " vs " + floorSuffixLeadEnd);
|
||||
// }
|
||||
//}
|
||||
if (currentFrame.suffix >= suffix && (currentFrame.suffixBytes[currentFrame.startBytePos+suffix-1]&0xff) > floorSuffixLeadEnd) {
|
||||
// Done scanning: we are now on the first term after all
|
||||
// terms matched by this auto-prefix term
|
||||
//if (DEBUG) System.out.println(" done; now stop scan");
|
||||
break;
|
||||
}
|
||||
}
|
||||
final long lastFP = currentFrame.fpOrig;
|
||||
currentFrame = stack[currentFrame.ord-1];
|
||||
assert currentFrame.lastSubFP == lastFP;
|
||||
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
|
||||
}
|
||||
} else {
|
||||
// Pop finished frames
|
||||
while (currentFrame.nextEnt == currentFrame.entCount) {
|
||||
if (!currentFrame.isLastInFloor) {
|
||||
//if (DEBUG) System.out.println(" next-floor-block: trans: " + currentFrame.transition);
|
||||
// Advance to next floor block
|
||||
currentFrame.loadNextFloorBlock();
|
||||
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix);
|
||||
break;
|
||||
} else {
|
||||
//if (DEBUG) System.out.println(" pop frame");
|
||||
if (currentFrame.ord == 0) {
|
||||
//if (DEBUG) System.out.println(" return null");
|
||||
return null;
|
||||
}
|
||||
final long lastFP = currentFrame.fpOrig;
|
||||
currentFrame = stack[currentFrame.ord-1];
|
||||
assert currentFrame.lastSubFP == lastFP;
|
||||
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " outputPrefix=" + currentFrame.outputPrefix);
|
||||
}
|
||||
}
|
||||
|
||||
isSubBlock = currentFrame.next();
|
||||
}
|
||||
|
||||
final boolean isSubBlock = currentFrame.next();
|
||||
// if (DEBUG) {
|
||||
// final BytesRef suffixRef = new BytesRef();
|
||||
// suffixRef.bytes = currentFrame.suffixBytes;
|
||||
// suffixRef.offset = currentFrame.startBytePos;
|
||||
// suffixRef.length = currentFrame.suffix;
|
||||
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
|
||||
// }
|
||||
//if (DEBUG) {
|
||||
// final BytesRef suffixRef = new BytesRef();
|
||||
// suffixRef.bytes = currentFrame.suffixBytes;
|
||||
// suffixRef.offset = currentFrame.startBytePos;
|
||||
// suffixRef.length = currentFrame.suffix;
|
||||
// System.out.println(" " + (isSubBlock ? "sub-block" : "term") + " " + currentFrame.nextEnt + " (of " + currentFrame.entCount + ") suffix=" + brToString(suffixRef));
|
||||
//}
|
||||
|
||||
if (currentFrame.suffix != 0) {
|
||||
// Advance where we are in the automaton to match what terms
|
||||
// dict next'd to:
|
||||
final int label = currentFrame.suffixBytes[currentFrame.startBytePos] & 0xff;
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" move automaton to label=" + label + " vs curMax=" + currentFrame.curTransitionMax);
|
||||
// }
|
||||
while (label > currentFrame.curTransitionMax) {
|
||||
if (currentFrame.transitionIndex >= currentFrame.transitionCount-1) {
|
||||
// Stop processing this frame -- no further
|
||||
// matches are possible because we've moved
|
||||
// beyond what the max transition will allow
|
||||
//if (DEBUG) System.out.println(" break: trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]));
|
||||
|
||||
// sneaky! forces a pop above
|
||||
currentFrame.isLastInFloor = true;
|
||||
currentFrame.nextEnt = currentFrame.entCount;
|
||||
// Pop this frame: no further matches are possible because
|
||||
// we've moved beyond what the max transition will allow
|
||||
//if (DEBUG) System.out.println(" break: trans");
|
||||
if (currentFrame.ord == 0) {
|
||||
//if (DEBUG) System.out.println(" return null");
|
||||
return null;
|
||||
}
|
||||
currentFrame = stack[currentFrame.ord-1];
|
||||
continue nextTerm;
|
||||
}
|
||||
currentFrame.transitionIndex++;
|
||||
compiledAutomaton.automaton.getNextTransition(currentFrame.transition);
|
||||
automaton.getNextTransition(currentFrame.transition);
|
||||
currentFrame.curTransitionMax = currentFrame.transition.max;
|
||||
//if (DEBUG) System.out.println(" next trans=" + currentFrame.transitions[currentFrame.transitionIndex]);
|
||||
//if (DEBUG) System.out.println(" next trans");
|
||||
}
|
||||
}
|
||||
|
||||
// First test the common suffix, if set:
|
||||
if (compiledAutomaton.commonSuffixRef != null && !isSubBlock) {
|
||||
if (commonSuffix != null && !isSubBlock) {
|
||||
final int termLen = currentFrame.prefix + currentFrame.suffix;
|
||||
if (termLen < compiledAutomaton.commonSuffixRef.length) {
|
||||
if (termLen < commonSuffix.length) {
|
||||
// No match
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" skip: common suffix length");
|
||||
// }
|
||||
//if (DEBUG) System.out.println(" skip: common suffix length");
|
||||
continue nextTerm;
|
||||
}
|
||||
|
||||
final byte[] suffixBytes = currentFrame.suffixBytes;
|
||||
final byte[] commonSuffixBytes = compiledAutomaton.commonSuffixRef.bytes;
|
||||
final byte[] commonSuffixBytes = commonSuffix.bytes;
|
||||
|
||||
final int lenInPrefix = compiledAutomaton.commonSuffixRef.length - currentFrame.suffix;
|
||||
assert compiledAutomaton.commonSuffixRef.offset == 0;
|
||||
final int lenInPrefix = commonSuffix.length - currentFrame.suffix;
|
||||
assert commonSuffix.offset == 0;
|
||||
int suffixBytesPos;
|
||||
int commonSuffixBytesPos = 0;
|
||||
|
||||
|
@ -381,24 +597,20 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
final int termBytesPosEnd = currentFrame.prefix;
|
||||
while (termBytesPos < termBytesPosEnd) {
|
||||
if (termBytes[termBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" skip: common suffix mismatch (in prefix)");
|
||||
// }
|
||||
//if (DEBUG) System.out.println(" skip: common suffix mismatch (in prefix)");
|
||||
continue nextTerm;
|
||||
}
|
||||
}
|
||||
suffixBytesPos = currentFrame.startBytePos;
|
||||
} else {
|
||||
suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - compiledAutomaton.commonSuffixRef.length;
|
||||
suffixBytesPos = currentFrame.startBytePos + currentFrame.suffix - commonSuffix.length;
|
||||
}
|
||||
|
||||
// Test overlapping suffix part:
|
||||
final int commonSuffixBytesPosEnd = compiledAutomaton.commonSuffixRef.length;
|
||||
final int commonSuffixBytesPosEnd = commonSuffix.length;
|
||||
while (commonSuffixBytesPos < commonSuffixBytesPosEnd) {
|
||||
if (suffixBytes[suffixBytesPos++] != commonSuffixBytes[commonSuffixBytesPos++]) {
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" skip: common suffix mismatch");
|
||||
// }
|
||||
//if (DEBUG) System.out.println(" skip: common suffix mismatch");
|
||||
continue nextTerm;
|
||||
}
|
||||
}
|
||||
|
@ -410,10 +622,19 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
// "temporarily" accepted, we just blindly .next()
|
||||
// until the limit
|
||||
|
||||
// See if the term prefix matches the automaton:
|
||||
// TODO: for first iter of this loop can't we just use the current trans? we already advanced it and confirmed it matches lead
|
||||
// byte of the suffix
|
||||
|
||||
// See if the term suffix matches the automaton:
|
||||
int state = currentFrame.state;
|
||||
int lastState = currentFrame.lastState;
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" a state=" + state + " curFrame.suffix.len=" + currentFrame.suffix + " curFrame.prefix=" + currentFrame.prefix);
|
||||
// }
|
||||
for (int idx=0;idx<currentFrame.suffix;idx++) {
|
||||
state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
|
||||
lastState = state;
|
||||
//if (DEBUG) System.out.println(" step label=" + (char) (currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff));
|
||||
state = runAutomaton.step(state, currentFrame.suffixBytes[currentFrame.startBytePos+idx] & 0xff);
|
||||
if (state == -1) {
|
||||
// No match
|
||||
//System.out.println(" no s=" + state);
|
||||
|
@ -423,16 +644,59 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
//if (DEBUG) System.out.println(" after suffix: state=" + state + " lastState=" + lastState);
|
||||
|
||||
if (isSubBlock) {
|
||||
// Match! Recurse:
|
||||
//if (DEBUG) System.out.println(" sub-block match to state=" + state + "; recurse fp=" + currentFrame.lastSubFP);
|
||||
copyTerm();
|
||||
currentFrame = pushFrame(state);
|
||||
//if (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
|
||||
currentFrame.lastState = lastState;
|
||||
//xif (DEBUG) System.out.println("\n frame ord=" + currentFrame.ord + " prefix=" + brToString(new BytesRef(term.bytes, term.offset, currentFrame.prefix)) + " state=" + currentFrame.state + " lastInFloor?=" + currentFrame.isLastInFloor + " fp=" + currentFrame.fp + " trans=" + (currentFrame.transitions.length == 0 ? "n/a" : currentFrame.transitions[currentFrame.transitionIndex]) + " outputPrefix=" + currentFrame.outputPrefix);
|
||||
} else if (currentFrame.isAutoPrefixTerm) {
|
||||
// We are on an auto-prefix term, meaning this term was compiled
|
||||
// at indexing time, matching all terms sharing this prefix (or,
|
||||
// a floor'd subset of them if that count was too high). A
|
||||
// prefix term represents a range of terms, so we now need to
|
||||
// test whether, from the current state in the automaton, it
|
||||
// accepts all terms in that range. As long as it does, we can
|
||||
// use this term and then later skip ahead past all terms in
|
||||
// this range:
|
||||
if (allowAutoPrefixTerms) {
|
||||
|
||||
if (currentFrame.floorSuffixLeadEnd == -1) {
|
||||
// Simple prefix case
|
||||
useAutoPrefixTerm = state == sinkState;
|
||||
} else {
|
||||
if (currentFrame.floorSuffixLeadStart == -1) {
|
||||
// Must also accept the empty string in this case
|
||||
if (automaton.isAccept(state)) {
|
||||
//if (DEBUG) System.out.println(" state is accept");
|
||||
useAutoPrefixTerm = acceptsSuffixRange(state, 0, currentFrame.floorSuffixLeadEnd);
|
||||
}
|
||||
} else {
|
||||
useAutoPrefixTerm = acceptsSuffixRange(lastState, currentFrame.floorSuffixLeadStart, currentFrame.floorSuffixLeadEnd);
|
||||
}
|
||||
}
|
||||
|
||||
//if (DEBUG) System.out.println(" useAutoPrefixTerm=" + useAutoPrefixTerm);
|
||||
|
||||
if (useAutoPrefixTerm) {
|
||||
copyTerm();
|
||||
currentFrame.termState.isRealTerm = false;
|
||||
//if (DEBUG) System.out.println(" return auto prefix term: " + brToString(term));
|
||||
return term;
|
||||
} else {
|
||||
// We move onto the next term
|
||||
}
|
||||
} else {
|
||||
// We are not allowed to use auto-prefix terms, so we just skip it
|
||||
}
|
||||
} else if (runAutomaton.isAccept(state)) {
|
||||
copyTerm();
|
||||
//if (DEBUG) System.out.println(" term match to state=" + state + "; return term=" + brToString(term));
|
||||
//if (DEBUG) System.out.println(" term match to state=" + state);
|
||||
assert savedStartTerm == null || term.compareTo(savedStartTerm) > 0: "saveStartTerm=" + savedStartTerm.utf8ToString() + " term=" + term.utf8ToString();
|
||||
//if (DEBUG) System.out.println(" return term=" + brToString(term));
|
||||
return term;
|
||||
} else {
|
||||
//System.out.println(" no s=" + state);
|
||||
|
@ -440,6 +704,41 @@ final class IntersectTermsEnum extends TermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
private final Transition transition = new Transition();
|
||||
|
||||
/** Returns true if, from this state, the automaton accepts any suffix
|
||||
* starting with a label between start and end, inclusive. We just
|
||||
* look for a transition, matching this range, to the sink state. */
|
||||
private boolean acceptsSuffixRange(int state, int start, int end) {
|
||||
|
||||
//xif (DEBUG) System.out.println(" acceptsSuffixRange state=" + state + " start=" + start + " end=" + end);
|
||||
|
||||
int count = automaton.initTransition(state, transition);
|
||||
//xif (DEBUG) System.out.println(" transCount=" + count);
|
||||
//xif (DEBUG) System.out.println(" trans=" + transition);
|
||||
for(int i=0;i<count;i++) {
|
||||
automaton.getNextTransition(transition);
|
||||
if (start >= transition.min && end <= transition.max && transition.dest == sinkState) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private void copyTerm() {
|
||||
//System.out.println(" copyTerm cur.prefix=" + currentFrame.prefix + " cur.suffix=" + currentFrame.suffix + " first=" + (char) currentFrame.suffixBytes[currentFrame.startBytePos]);
|
||||
final int len = currentFrame.prefix + currentFrame.suffix;
|
||||
|
|
|
@ -35,9 +35,14 @@ final class IntersectTermsEnumFrame {
|
|||
long fpEnd;
|
||||
long lastSubFP;
|
||||
|
||||
// private static boolean DEBUG = IntersectTermsEnum.DEBUG;
|
||||
|
||||
// State in automaton
|
||||
int state;
|
||||
|
||||
// State just before the last label
|
||||
int lastState;
|
||||
|
||||
int metaDataUpto;
|
||||
|
||||
byte[] suffixBytes = new byte[128];
|
||||
|
@ -73,6 +78,8 @@ final class IntersectTermsEnumFrame {
|
|||
int transitionIndex;
|
||||
int transitionCount;
|
||||
|
||||
final boolean versionAutoPrefix;
|
||||
|
||||
FST.Arc<BytesRef> arc;
|
||||
|
||||
final BlockTermState termState;
|
||||
|
@ -89,6 +96,17 @@ final class IntersectTermsEnumFrame {
|
|||
int startBytePos;
|
||||
int suffix;
|
||||
|
||||
// When we are on an auto-prefix term this is the starting lead byte
|
||||
// of the suffix (e.g. 'a' for the foo[a-m]* case):
|
||||
int floorSuffixLeadStart;
|
||||
|
||||
// When we are on an auto-prefix term this is the ending lead byte
|
||||
// of the suffix (e.g. 'm' for the foo[a-m]* case):
|
||||
int floorSuffixLeadEnd;
|
||||
|
||||
// True if the term we are currently on is an auto-prefix term:
|
||||
boolean isAutoPrefixTerm;
|
||||
|
||||
private final IntersectTermsEnum ite;
|
||||
|
||||
public IntersectTermsEnumFrame(IntersectTermsEnum ite, int ord) throws IOException {
|
||||
|
@ -97,35 +115,39 @@ final class IntersectTermsEnumFrame {
|
|||
this.termState = ite.fr.parent.postingsReader.newTermState();
|
||||
this.termState.totalTermFreq = -1;
|
||||
this.longs = new long[ite.fr.longsSize];
|
||||
this.versionAutoPrefix = ite.fr.parent.version >= BlockTreeTermsReader.VERSION_AUTO_PREFIX_TERMS;
|
||||
}
|
||||
|
||||
void loadNextFloorBlock() throws IOException {
|
||||
assert numFollowFloorBlocks > 0;
|
||||
//if (DEBUG) System.out.println(" loadNextFoorBlock trans=" + transitions[transitionIndex]);
|
||||
//if (DEBUG) System.out.println(" loadNextFloorBlock transition.min=" + transition.min);
|
||||
|
||||
do {
|
||||
fp = fpOrig + (floorDataReader.readVLong() >>> 1);
|
||||
numFollowFloorBlocks--;
|
||||
// if (DEBUG) System.out.println(" skip floor block2! nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[transitionIndex].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
//if (DEBUG) System.out.println(" skip floor block2! nextFloorLabel=" + (char) nextFloorLabel + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
if (numFollowFloorBlocks != 0) {
|
||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||
} else {
|
||||
nextFloorLabel = 256;
|
||||
}
|
||||
// if (DEBUG) System.out.println(" nextFloorLabel=" + (char) nextFloorLabel);
|
||||
//if (DEBUG) System.out.println(" nextFloorLabel=" + (char) nextFloorLabel);
|
||||
} while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min);
|
||||
|
||||
//if (DEBUG) System.out.println(" done loadNextFloorBlock");
|
||||
|
||||
load(null);
|
||||
}
|
||||
|
||||
public void setState(int state) {
|
||||
this.state = state;
|
||||
transitionIndex = 0;
|
||||
transitionCount = ite.compiledAutomaton.automaton.getNumTransitions(state);
|
||||
transitionCount = ite.automaton.getNumTransitions(state);
|
||||
if (transitionCount != 0) {
|
||||
ite.compiledAutomaton.automaton.initTransition(state, transition);
|
||||
ite.compiledAutomaton.automaton.getNextTransition(transition);
|
||||
ite.automaton.initTransition(state, transition);
|
||||
ite.automaton.getNextTransition(transition);
|
||||
curTransitionMax = transition.max;
|
||||
//if (DEBUG) System.out.println(" after setState state=" + state + " trans: " + transition + " transCount=" + transitionCount);
|
||||
} else {
|
||||
curTransitionMax = -1;
|
||||
}
|
||||
|
@ -133,7 +155,7 @@ final class IntersectTermsEnumFrame {
|
|||
|
||||
void load(BytesRef frameIndexData) throws IOException {
|
||||
|
||||
// if (DEBUG) System.out.println(" load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));
|
||||
//xif (DEBUG) System.out.println(" load fp=" + fp + " fpOrig=" + fpOrig + " frameIndexData=" + frameIndexData + " trans=" + (transitions.length != 0 ? transitions[0] : "n/a" + " state=" + state));
|
||||
|
||||
if (frameIndexData != null && transitionCount != 0) {
|
||||
// Floor frame
|
||||
|
@ -148,7 +170,7 @@ final class IntersectTermsEnumFrame {
|
|||
if ((code & BlockTreeTermsReader.OUTPUT_FLAG_IS_FLOOR) != 0) {
|
||||
numFollowFloorBlocks = floorDataReader.readVInt();
|
||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||
// if (DEBUG) System.out.println(" numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);
|
||||
//if (DEBUG) System.out.println(" numFollowFloorBlocks=" + numFollowFloorBlocks + " nextFloorLabel=" + nextFloorLabel);
|
||||
|
||||
// If current state is accept, we must process
|
||||
// first block in case it has empty suffix:
|
||||
|
@ -158,7 +180,7 @@ final class IntersectTermsEnumFrame {
|
|||
while (numFollowFloorBlocks != 0 && nextFloorLabel <= transition.min) {
|
||||
fp = fpOrig + (floorDataReader.readVLong() >>> 1);
|
||||
numFollowFloorBlocks--;
|
||||
// if (DEBUG) System.out.println(" skip floor block! nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
//xif (DEBUG) System.out.println(" skip floor block! nextFloorLabel=" + (char) nextFloorLabel + " vs target=" + (char) transitions[0].getMin() + " newFP=" + fp + " numFollowFloorBlocks=" + numFollowFloorBlocks);
|
||||
if (numFollowFloorBlocks != 0) {
|
||||
nextFloorLabel = floorDataReader.readByte() & 0xff;
|
||||
} else {
|
||||
|
@ -179,7 +201,7 @@ final class IntersectTermsEnumFrame {
|
|||
code = ite.in.readVInt();
|
||||
isLeafBlock = (code & 1) != 0;
|
||||
int numBytes = code >>> 1;
|
||||
// if (DEBUG) System.out.println(" entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
|
||||
//if (DEBUG) System.out.println(" entCount=" + entCount + " lastInFloor?=" + isLastInFloor + " leafBlock?=" + isLeafBlock + " numSuffixBytes=" + numBytes);
|
||||
if (suffixBytes.length < numBytes) {
|
||||
suffixBytes = new byte[ArrayUtil.oversize(numBytes, 1)];
|
||||
}
|
||||
|
@ -214,41 +236,106 @@ final class IntersectTermsEnumFrame {
|
|||
// written one after another -- tail recurse:
|
||||
fpEnd = ite.in.getFilePointer();
|
||||
}
|
||||
|
||||
// Necessary in case this ord previously was an auto-prefix
|
||||
// term but now we recurse to a new leaf block
|
||||
isAutoPrefixTerm = false;
|
||||
}
|
||||
|
||||
// TODO: maybe add scanToLabel; should give perf boost
|
||||
|
||||
// Decodes next entry; returns true if it's a sub-block
|
||||
public boolean next() {
|
||||
return isLeafBlock ? nextLeaf() : nextNonLeaf();
|
||||
if (isLeafBlock) {
|
||||
nextLeaf();
|
||||
return false;
|
||||
} else {
|
||||
return nextNonLeaf();
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes next entry; returns true if it's a sub-block
|
||||
public boolean nextLeaf() {
|
||||
//if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
|
||||
public void nextLeaf() {
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" frame.nextLeaf ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
|
||||
//}
|
||||
assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
suffix = suffixesReader.readVInt();
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
suffixesReader.skipBytes(suffix);
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean nextNonLeaf() {
|
||||
//if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
|
||||
//if (DEBUG) {
|
||||
// System.out.println(" frame.nextNonLeaf ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount + " versionAutoPrefix=" + versionAutoPrefix + " fp=" + suffixesReader.getPosition());
|
||||
// }
|
||||
assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
final int code = suffixesReader.readVInt();
|
||||
suffix = code >>> 1;
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
suffixesReader.skipBytes(suffix);
|
||||
if ((code & 1) == 0) {
|
||||
// A normal term
|
||||
termState.termBlockOrd++;
|
||||
return false;
|
||||
if (versionAutoPrefix == false) {
|
||||
suffix = code >>> 1;
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
suffixesReader.skipBytes(suffix);
|
||||
if ((code & 1) == 0) {
|
||||
// A normal term
|
||||
termState.termBlockOrd++;
|
||||
return false;
|
||||
} else {
|
||||
// A sub-block; make sub-FP absolute:
|
||||
lastSubFP = fp - suffixesReader.readVLong();
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
// A sub-block; make sub-FP absolute:
|
||||
lastSubFP = fp - suffixesReader.readVLong();
|
||||
return true;
|
||||
suffix = code >>> 2;
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
suffixesReader.skipBytes(suffix);
|
||||
switch (code & 3) {
|
||||
case 0:
|
||||
// A normal term
|
||||
//if (DEBUG) System.out.println(" ret: term");
|
||||
isAutoPrefixTerm = false;
|
||||
termState.termBlockOrd++;
|
||||
return false;
|
||||
case 1:
|
||||
// A sub-block; make sub-FP absolute:
|
||||
isAutoPrefixTerm = false;
|
||||
lastSubFP = fp - suffixesReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" ret: sub-block");
|
||||
return true;
|
||||
case 2:
|
||||
// A normal prefix term, suffix leads with empty string
|
||||
floorSuffixLeadStart = -1;
|
||||
termState.termBlockOrd++;
|
||||
floorSuffixLeadEnd = suffixesReader.readByte() & 0xff;
|
||||
if (floorSuffixLeadEnd == 0xff) {
|
||||
floorSuffixLeadEnd = -1;
|
||||
//System.out.println(" fill in -1");
|
||||
}
|
||||
//if (DEBUG) System.out.println(" ret: floor prefix term: start=-1 end=" + floorSuffixLeadEnd);
|
||||
isAutoPrefixTerm = true;
|
||||
return false;
|
||||
case 3:
|
||||
// A floor'd prefix term, suffix leads with real byte
|
||||
if (suffix == 0) {
|
||||
// TODO: this is messy, but necessary because we are an auto-prefix term, but our suffix is the empty string here, so we have to
|
||||
// look at the parent block to get the lead suffix byte:
|
||||
assert ord > 0;
|
||||
IntersectTermsEnumFrame parent = ite.stack[ord-1];
|
||||
floorSuffixLeadStart = parent.suffixBytes[parent.startBytePos+parent.suffix-1] & 0xff;
|
||||
//if (DEBUG) System.out.println(" peek-parent: suffix=" + floorSuffixLeadStart);
|
||||
} else {
|
||||
floorSuffixLeadStart = suffixBytes[startBytePos+suffix-1] & 0xff;
|
||||
}
|
||||
termState.termBlockOrd++;
|
||||
isAutoPrefixTerm = true;
|
||||
floorSuffixLeadEnd = suffixesReader.readByte() & 0xff;
|
||||
//if (DEBUG) System.out.println(" ret: floor prefix term start=" + floorSuffixLeadStart + " end=" + floorSuffixLeadEnd);
|
||||
return false;
|
||||
default:
|
||||
// Silly javac:
|
||||
assert false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -34,7 +34,9 @@ import org.apache.lucene.util.RamUsageEstimator;
|
|||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
/** Iterates through terms in this field */
|
||||
/** Iterates through terms in this field. This implementation skips
|
||||
* any auto-prefix terms it encounters. */
|
||||
|
||||
final class SegmentTermsEnum extends TermsEnum {
|
||||
|
||||
// Lazy init:
|
||||
|
@ -48,7 +50,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
private int targetBeforeCurrentLength;
|
||||
|
||||
// static boolean DEBUG = false;
|
||||
//static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||
|
||||
private final ByteArrayDataInput scratchReader = new ByteArrayDataInput();
|
||||
|
||||
|
@ -119,6 +121,8 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
* computing aggregate statistics. */
|
||||
public Stats computeBlockStats() throws IOException {
|
||||
|
||||
// TODO: add total auto-prefix term count
|
||||
|
||||
Stats stats = new Stats(fr.parent.segment, fr.fieldInfo.name);
|
||||
if (fr.index != null) {
|
||||
stats.indexNodeCount = fr.index.getNodeCount();
|
||||
|
@ -152,8 +156,10 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
while (currentFrame.nextEnt == currentFrame.entCount) {
|
||||
stats.endBlock(currentFrame);
|
||||
if (!currentFrame.isLastInFloor) {
|
||||
// Advance to next floor block
|
||||
currentFrame.loadNextFloorBlock();
|
||||
stats.startBlock(currentFrame, true);
|
||||
break;
|
||||
} else {
|
||||
if (currentFrame.ord == 0) {
|
||||
break allTerms;
|
||||
|
@ -175,8 +181,6 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
// This is a "next" frame -- even if it's
|
||||
// floor'd we must pretend it isn't so we don't
|
||||
// try to scan to the right floor frame:
|
||||
currentFrame.isFloor = false;
|
||||
//currentFrame.hasTerms = true;
|
||||
currentFrame.loadBlock();
|
||||
stats.startBlock(currentFrame, !currentFrame.isLastInFloor);
|
||||
} else {
|
||||
|
@ -294,6 +298,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
return true;
|
||||
}
|
||||
|
||||
/*
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
|
@ -307,8 +312,15 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
}
|
||||
|
||||
// for debugging
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRefBuilder b) {
|
||||
return brToString(b.get());
|
||||
}
|
||||
*/
|
||||
|
||||
@Override
|
||||
public boolean seekExact(final BytesRef target) throws IOException {
|
||||
public boolean seekExact(BytesRef target) throws IOException {
|
||||
|
||||
if (fr.index == null) {
|
||||
throw new IllegalStateException("terms index was not loaded");
|
||||
|
@ -565,7 +577,8 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SeekStatus seekCeil(final BytesRef target) throws IOException {
|
||||
public SeekStatus seekCeil(BytesRef target) throws IOException {
|
||||
|
||||
if (fr.index == null) {
|
||||
throw new IllegalStateException("terms index was not loaded");
|
||||
}
|
||||
|
@ -575,7 +588,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
assert clearEOF();
|
||||
|
||||
// if (DEBUG) {
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + target.utf8ToString() + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// System.out.println("\nBTTR.seekCeil seg=" + fr.parent.segment + " target=" + fr.fieldInfo.name + ":" + brToString(target) + " " + target + " current=" + brToString(term) + " (exists?=" + termExists + ") validIndexPrefix= " + validIndexPrefix);
|
||||
// printSeekState(System.out);
|
||||
// }
|
||||
|
||||
|
@ -617,7 +630,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
while (targetUpto < targetLimit) {
|
||||
cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
//System.out.println(" cycle targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")" + " arc.output=" + arc.output + " output=" + output);
|
||||
//}
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -647,7 +660,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
while (targetUpto < targetLimit2) {
|
||||
cmp = (term.byteAt(targetUpto)&0xFF) - (target.bytes[target.offset + targetUpto]&0xFF);
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.bytes[targetUpto]) + ")");
|
||||
//System.out.println(" cycle2 targetUpto=" + targetUpto + " (vs limit=" + targetLimit + ") cmp=" + cmp + " (targetLabel=" + (char) (target.bytes[target.offset + targetUpto]) + " vs termLabel=" + (char) (term.byteAt(targetUpto)) + ")");
|
||||
//}
|
||||
if (cmp != 0) {
|
||||
break;
|
||||
|
@ -733,7 +746,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
// Index is exhausted
|
||||
// if (DEBUG) {
|
||||
// System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + toHex(targetLabel));
|
||||
// System.out.println(" index: index exhausted label=" + ((char) targetLabel) + " " + targetLabel);
|
||||
// }
|
||||
|
||||
validIndexPrefix = currentFrame.prefix;
|
||||
|
@ -743,6 +756,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
currentFrame.loadBlock();
|
||||
|
||||
//if (DEBUG) System.out.println(" now scanToTerm");
|
||||
final SeekStatus result = currentFrame.scanToTerm(target, false);
|
||||
if (result == SeekStatus.END) {
|
||||
term.copyBytes(target);
|
||||
|
@ -750,7 +764,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
|
||||
if (next() != null) {
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" return NOT_FOUND term=" + brToString(term) + " " + term);
|
||||
//System.out.println(" return NOT_FOUND term=" + brToString(term));
|
||||
//}
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -761,7 +775,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
} else {
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" return " + result + " term=" + brToString(term) + " " + term);
|
||||
//System.out.println(" return " + result + " term=" + brToString(term));
|
||||
//}
|
||||
return result;
|
||||
}
|
||||
|
@ -776,7 +790,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" index: follow label=" + toHex(target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
|
||||
//System.out.println(" index: follow label=" + (target.bytes[target.offset + targetUpto]&0xff) + " arc.output=" + arc.output + " arc.nfo=" + arc.nextFinalOutput);
|
||||
//}
|
||||
targetUpto++;
|
||||
|
||||
|
@ -802,7 +816,7 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
termExists = false;
|
||||
if (next() != null) {
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" return NOT_FOUND term=" + term.utf8ToString() + " " + term);
|
||||
//System.out.println(" return NOT_FOUND term=" + term.get().utf8ToString() + " " + term);
|
||||
//}
|
||||
return SeekStatus.NOT_FOUND;
|
||||
} else {
|
||||
|
@ -906,7 +920,9 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
// Pop finished blocks
|
||||
while (currentFrame.nextEnt == currentFrame.entCount) {
|
||||
if (!currentFrame.isLastInFloor) {
|
||||
// Advance to next floor block
|
||||
currentFrame.loadNextFloorBlock();
|
||||
break;
|
||||
} else {
|
||||
//if (DEBUG) System.out.println(" pop frame");
|
||||
if (currentFrame.ord == 0) {
|
||||
|
@ -946,11 +962,9 @@ final class SegmentTermsEnum extends TermsEnum {
|
|||
// This is a "next" frame -- even if it's
|
||||
// floor'd we must pretend it isn't so we don't
|
||||
// try to scan to the right floor frame:
|
||||
currentFrame.isFloor = false;
|
||||
//currentFrame.hasTerms = true;
|
||||
currentFrame.loadBlock();
|
||||
} else {
|
||||
//if (DEBUG) System.out.println(" return term=" + term.utf8ToString() + " " + term + " currentFrame.ord=" + currentFrame.ord);
|
||||
//if (DEBUG) System.out.println(" return term=" + brToString(term) + " currentFrame.ord=" + currentFrame.ord);
|
||||
return term.get();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,6 +37,10 @@ final class SegmentTermsEnumFrame {
|
|||
|
||||
FST.Arc<BytesRef> arc;
|
||||
|
||||
final boolean versionAutoPrefix;
|
||||
|
||||
//static boolean DEBUG = BlockTreeTermsWriter.DEBUG;
|
||||
|
||||
// File pointer where this block was loaded from
|
||||
long fp;
|
||||
long fpOrig;
|
||||
|
@ -96,6 +100,7 @@ final class SegmentTermsEnumFrame {
|
|||
this.state = ste.fr.parent.postingsReader.newTermState();
|
||||
this.state.totalTermFreq = -1;
|
||||
this.longs = new long[ste.fr.longsSize];
|
||||
this.versionAutoPrefix = ste.fr.parent.version >= BlockTreeTermsReader.VERSION_AUTO_PREFIX_TERMS;
|
||||
}
|
||||
|
||||
public void setFloorData(ByteArrayDataInput in, BytesRef source) {
|
||||
|
@ -262,12 +267,17 @@ final class SegmentTermsEnumFrame {
|
|||
*/
|
||||
}
|
||||
|
||||
public boolean next() {
|
||||
return isLeafBlock ? nextLeaf() : nextNonLeaf();
|
||||
// Decodes next entry; returns true if it's a sub-block
|
||||
public boolean next() throws IOException {
|
||||
if (isLeafBlock) {
|
||||
nextLeaf();
|
||||
return false;
|
||||
} else {
|
||||
return nextNonLeaf();
|
||||
}
|
||||
}
|
||||
|
||||
// Decodes next entry; returns true if it's a sub-block
|
||||
public boolean nextLeaf() {
|
||||
public void nextLeaf() {
|
||||
//if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
|
@ -276,36 +286,78 @@ final class SegmentTermsEnumFrame {
|
|||
ste.term.setLength(prefix + suffix);
|
||||
ste.term.grow(ste.term.length());
|
||||
suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
|
||||
// A normal term
|
||||
ste.termExists = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public boolean nextNonLeaf() {
|
||||
//if (DEBUG) System.out.println(" frame.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount);
|
||||
assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
final int code = suffixesReader.readVInt();
|
||||
suffix = code >>> 1;
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
ste.term.setLength(prefix + suffix);
|
||||
ste.term.grow(ste.term.length());
|
||||
suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
|
||||
if ((code & 1) == 0) {
|
||||
// A normal term
|
||||
ste.termExists = true;
|
||||
subCode = 0;
|
||||
state.termBlockOrd++;
|
||||
return false;
|
||||
} else {
|
||||
// A sub-block; make sub-FP absolute:
|
||||
ste.termExists = false;
|
||||
subCode = suffixesReader.readVLong();
|
||||
lastSubFP = fp - subCode;
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" lastSubFP=" + lastSubFP);
|
||||
//}
|
||||
return true;
|
||||
public boolean nextNonLeaf() throws IOException {
|
||||
//if (DEBUG) System.out.println(" stef.next ord=" + ord + " nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + suffixesReader.getPosition());
|
||||
while (true) {
|
||||
if (nextEnt == entCount) {
|
||||
assert arc == null || (isFloor && isLastInFloor == false): "isFloor=" + isFloor + " isLastInFloor=" + isLastInFloor;
|
||||
loadNextFloorBlock();
|
||||
if (isLeafBlock) {
|
||||
nextLeaf();
|
||||
return false;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
assert nextEnt != -1 && nextEnt < entCount: "nextEnt=" + nextEnt + " entCount=" + entCount + " fp=" + fp;
|
||||
nextEnt++;
|
||||
final int code = suffixesReader.readVInt();
|
||||
if (versionAutoPrefix == false) {
|
||||
suffix = code >>> 1;
|
||||
} else {
|
||||
suffix = code >>> 2;
|
||||
}
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
ste.term.setLength(prefix + suffix);
|
||||
ste.term.grow(ste.term.length());
|
||||
suffixesReader.readBytes(ste.term.bytes(), prefix, suffix);
|
||||
if (versionAutoPrefix == false) {
|
||||
if ((code & 1) == 0) {
|
||||
// A normal term
|
||||
ste.termExists = true;
|
||||
subCode = 0;
|
||||
state.termBlockOrd++;
|
||||
return false;
|
||||
} else {
|
||||
// A sub-block; make sub-FP absolute:
|
||||
ste.termExists = false;
|
||||
subCode = suffixesReader.readVLong();
|
||||
lastSubFP = fp - subCode;
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" lastSubFP=" + lastSubFP);
|
||||
//}
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
|
||||
switch(code & 3) {
|
||||
case 0:
|
||||
// A normal term
|
||||
ste.termExists = true;
|
||||
subCode = 0;
|
||||
state.termBlockOrd++;
|
||||
return false;
|
||||
case 1:
|
||||
// A sub-block; make sub-FP absolute:
|
||||
ste.termExists = false;
|
||||
subCode = suffixesReader.readVLong();
|
||||
lastSubFP = fp - subCode;
|
||||
//if (DEBUG) {
|
||||
//System.out.println(" lastSubFP=" + lastSubFP);
|
||||
//}
|
||||
return true;
|
||||
case 2:
|
||||
case 3:
|
||||
// A prefix term: skip it
|
||||
state.termBlockOrd++;
|
||||
suffixesReader.readByte();
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -448,18 +500,38 @@ final class SegmentTermsEnumFrame {
|
|||
assert nextEnt < entCount;
|
||||
nextEnt++;
|
||||
final int code = suffixesReader.readVInt();
|
||||
suffixesReader.skipBytes(isLeafBlock ? code : code >>> 1);
|
||||
//if (DEBUG) System.out.println(" " + nextEnt + " (of " + entCount + ") ent isSubBlock=" + ((code&1)==1));
|
||||
if ((code & 1) != 0) {
|
||||
final long subCode = suffixesReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" subCode=" + subCode);
|
||||
if (targetSubCode == subCode) {
|
||||
//if (DEBUG) System.out.println(" match!");
|
||||
lastSubFP = subFP;
|
||||
return;
|
||||
if (versionAutoPrefix == false) {
|
||||
suffixesReader.skipBytes(code >>> 1);
|
||||
if ((code & 1) != 0) {
|
||||
final long subCode = suffixesReader.readVLong();
|
||||
if (targetSubCode == subCode) {
|
||||
//if (DEBUG) System.out.println(" match!");
|
||||
lastSubFP = subFP;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
state.termBlockOrd++;
|
||||
}
|
||||
} else {
|
||||
state.termBlockOrd++;
|
||||
int flag = code & 3;
|
||||
suffixesReader.skipBytes(code >>> 2);
|
||||
//if (DEBUG) System.out.println(" " + nextEnt + " (of " + entCount + ") ent isSubBlock=" + ((code&1)==1));
|
||||
if (flag == 1) {
|
||||
// Sub-block
|
||||
final long subCode = suffixesReader.readVLong();
|
||||
//if (DEBUG) System.out.println(" subCode=" + subCode);
|
||||
if (targetSubCode == subCode) {
|
||||
//if (DEBUG) System.out.println(" match!");
|
||||
lastSubFP = subFP;
|
||||
return;
|
||||
}
|
||||
} else {
|
||||
state.termBlockOrd++;
|
||||
if (flag == 2 || flag == 3) {
|
||||
// Floor'd prefix term
|
||||
suffixesReader.readByte();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -473,6 +545,21 @@ final class SegmentTermsEnumFrame {
|
|||
private int suffix;
|
||||
private long subCode;
|
||||
|
||||
// for debugging
|
||||
/*
|
||||
@SuppressWarnings("unused")
|
||||
static String brToString(BytesRef b) {
|
||||
try {
|
||||
return b.utf8ToString() + " " + b;
|
||||
} catch (Throwable t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return b.toString();
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
// Target's prefix matches this block's prefix; we
|
||||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
@ -535,9 +622,6 @@ final class SegmentTermsEnumFrame {
|
|||
// keep scanning
|
||||
|
||||
if (nextEnt == entCount) {
|
||||
if (exactOnly) {
|
||||
fillTerm();
|
||||
}
|
||||
// We are done scanning this block
|
||||
break nextTerm;
|
||||
} else {
|
||||
|
@ -590,7 +674,7 @@ final class SegmentTermsEnumFrame {
|
|||
// scan the entries check if the suffix matches.
|
||||
public SeekStatus scanToTermNonLeaf(BytesRef target, boolean exactOnly) throws IOException {
|
||||
|
||||
//if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(term));
|
||||
//if (DEBUG) System.out.println(" scanToTermNonLeaf: block fp=" + fp + " prefix=" + prefix + " nextEnt=" + nextEnt + " (of " + entCount + ") target=" + brToString(target) + " term=" + brToString(target));
|
||||
|
||||
assert nextEnt != -1;
|
||||
|
||||
|
@ -605,30 +689,60 @@ final class SegmentTermsEnumFrame {
|
|||
assert prefixMatches(target);
|
||||
|
||||
// Loop over each entry (term or sub-block) in this block:
|
||||
//nextTerm: while(nextEnt < entCount) {
|
||||
nextTerm: while (true) {
|
||||
nextTerm: while(nextEnt < entCount) {
|
||||
|
||||
nextEnt++;
|
||||
|
||||
final int code = suffixesReader.readVInt();
|
||||
suffix = code >>> 1;
|
||||
// if (DEBUG) {
|
||||
// BytesRef suffixBytesRef = new BytesRef();
|
||||
// suffixBytesRef.bytes = suffixBytes;
|
||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
// }
|
||||
if (versionAutoPrefix == false) {
|
||||
suffix = code >>> 1;
|
||||
} else {
|
||||
suffix = code >>> 2;
|
||||
}
|
||||
|
||||
//if (DEBUG) {
|
||||
// BytesRef suffixBytesRef = new BytesRef();
|
||||
// suffixBytesRef.bytes = suffixBytes;
|
||||
// suffixBytesRef.offset = suffixesReader.getPosition();
|
||||
// suffixBytesRef.length = suffix;
|
||||
// System.out.println(" cycle: " + ((code&1)==1 ? "sub-block" : "term") + " " + (nextEnt-1) + " (of " + entCount + ") suffix=" + brToString(suffixBytesRef));
|
||||
//}
|
||||
|
||||
ste.termExists = (code & 1) == 0;
|
||||
final int termLen = prefix + suffix;
|
||||
startBytePos = suffixesReader.getPosition();
|
||||
suffixesReader.skipBytes(suffix);
|
||||
if (ste.termExists) {
|
||||
state.termBlockOrd++;
|
||||
subCode = 0;
|
||||
if (versionAutoPrefix == false) {
|
||||
ste.termExists = (code & 1) == 0;
|
||||
if (ste.termExists) {
|
||||
state.termBlockOrd++;
|
||||
subCode = 0;
|
||||
} else {
|
||||
subCode = suffixesReader.readVLong();
|
||||
lastSubFP = fp - subCode;
|
||||
}
|
||||
} else {
|
||||
subCode = suffixesReader.readVLong();
|
||||
lastSubFP = fp - subCode;
|
||||
switch (code & 3) {
|
||||
case 0:
|
||||
// Normal term
|
||||
ste.termExists = true;
|
||||
state.termBlockOrd++;
|
||||
subCode = 0;
|
||||
break;
|
||||
case 1:
|
||||
// Sub-block
|
||||
ste.termExists = false;
|
||||
subCode = suffixesReader.readVLong();
|
||||
lastSubFP = fp - subCode;
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
// Floor prefix term: skip it
|
||||
//if (DEBUG) System.out.println(" skip floor prefix term");
|
||||
suffixesReader.readByte();
|
||||
ste.termExists = false;
|
||||
state.termBlockOrd++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
final int targetLimit = target.offset + (target.length < termLen ? target.length : termLen);
|
||||
|
@ -637,7 +751,7 @@ final class SegmentTermsEnumFrame {
|
|||
// Loop over bytes in the suffix, comparing to
|
||||
// the target
|
||||
int bytePos = startBytePos;
|
||||
while(true) {
|
||||
while (true) {
|
||||
final int cmp;
|
||||
final boolean stop;
|
||||
if (targetPos < targetLimit) {
|
||||
|
@ -652,24 +766,18 @@ final class SegmentTermsEnumFrame {
|
|||
if (cmp < 0) {
|
||||
// Current entry is still before the target;
|
||||
// keep scanning
|
||||
|
||||
if (nextEnt == entCount) {
|
||||
if (exactOnly) {
|
||||
fillTerm();
|
||||
//termExists = true;
|
||||
}
|
||||
// We are done scanning this block
|
||||
break nextTerm;
|
||||
} else {
|
||||
continue nextTerm;
|
||||
}
|
||||
continue nextTerm;
|
||||
} else if (cmp > 0) {
|
||||
|
||||
// Done! Current entry is after target --
|
||||
// return NOT_FOUND:
|
||||
fillTerm();
|
||||
|
||||
//if (DEBUG) System.out.println(" maybe done exactOnly=" + exactOnly + " ste.termExists=" + ste.termExists);
|
||||
|
||||
if (!exactOnly && !ste.termExists) {
|
||||
//System.out.println(" now pushFrame");
|
||||
// TODO this
|
||||
// We are on a sub-block, and caller wants
|
||||
// us to position to the next term after
|
||||
// the target, so we must recurse into the
|
||||
|
|
|
@ -48,6 +48,8 @@ public class Stats {
|
|||
/** Total number of bytes (sum of term lengths) across all terms in the field. */
|
||||
public long totalTermBytes;
|
||||
|
||||
// TODO: add total auto-prefix term count
|
||||
|
||||
/** The number of normal (non-floor) blocks in the terms file. */
|
||||
public int nonFloorBlockCount;
|
||||
|
||||
|
|
|
@ -43,9 +43,9 @@ import org.apache.lucene.util.automaton.Transition;
|
|||
* completely accepted. This is not possible when the language accepted by the
|
||||
* FSM is not finite (i.e. * operator).
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
* @lucene.internal
|
||||
*/
|
||||
class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||
public class AutomatonTermsEnum extends FilteredTermsEnum {
|
||||
// a tableized array-based form of the DFA
|
||||
private final ByteRunAutomaton runAutomaton;
|
||||
// common suffix of the automaton
|
||||
|
@ -70,9 +70,8 @@ class AutomatonTermsEnum extends FilteredTermsEnum {
|
|||
/**
|
||||
* Construct an enumerator based upon an automaton, enumerating the specified
|
||||
* field, working on a supplied TermsEnum
|
||||
* <p>
|
||||
*
|
||||
* @lucene.experimental
|
||||
* <p>
|
||||
* @param compiled CompiledAutomaton
|
||||
*/
|
||||
public AutomatonTermsEnum(TermsEnum tenum, CompiledAutomaton compiled) {
|
||||
|
|
|
@ -25,7 +25,9 @@ import java.nio.file.Paths;
|
|||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Deque;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
@ -56,6 +58,8 @@ import org.apache.lucene.util.IOUtils;
|
|||
import org.apache.lucene.util.LongBitSet;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
||||
/**
|
||||
* Basic tool and API to check the health of an index and
|
||||
|
@ -902,6 +906,180 @@ public class CheckIndex implements Closeable {
|
|||
return status;
|
||||
}
|
||||
|
||||
/** Visits all terms in the range minTerm (inclusive) to maxTerm (exclusive), marking all doc IDs encountered into allDocsSeen, and
|
||||
* returning the total number of terms visited. */
|
||||
private static long getDocsFromTermRange(String field, int maxDoc, TermsEnum termsEnum, FixedBitSet docsSeen, BytesRef minTerm, BytesRef maxTerm, boolean isIntersect) throws IOException {
|
||||
docsSeen.clear(0, docsSeen.length());
|
||||
|
||||
long termCount = 0;
|
||||
PostingsEnum postingsEnum = null;
|
||||
BytesRefBuilder lastTerm = null;
|
||||
while (true) {
|
||||
BytesRef term;
|
||||
|
||||
// Kinda messy: for intersect, we must first next(), but for "normal", we are already on our first term:
|
||||
if (isIntersect || termCount != 0) {
|
||||
term = termsEnum.next();
|
||||
} else {
|
||||
term = termsEnum.term();
|
||||
}
|
||||
|
||||
if (term == null) {
|
||||
if (isIntersect == false) {
|
||||
throw new RuntimeException("didn't see max term field=" + field + " term=" + maxTerm);
|
||||
}
|
||||
return termCount;
|
||||
}
|
||||
|
||||
assert term.isValid();
|
||||
|
||||
if (lastTerm == null) {
|
||||
lastTerm = new BytesRefBuilder();
|
||||
lastTerm.copyBytes(term);
|
||||
} else {
|
||||
if (lastTerm.get().compareTo(term) >= 0) {
|
||||
throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
|
||||
}
|
||||
lastTerm.copyBytes(term);
|
||||
}
|
||||
|
||||
//System.out.println(" term=" + term);
|
||||
|
||||
// Caller already ensured terms enum positioned >= minTerm:
|
||||
if (term.compareTo(minTerm) < 0) {
|
||||
throw new RuntimeException("saw term before min term field=" + field + " term=" + minTerm);
|
||||
}
|
||||
|
||||
if (isIntersect == false) {
|
||||
int cmp = term.compareTo(maxTerm);
|
||||
if (cmp == 0) {
|
||||
// Done!
|
||||
return termCount;
|
||||
} else if (cmp > 0) {
|
||||
throw new RuntimeException("didn't see end term field=" + field + " term=" + maxTerm);
|
||||
}
|
||||
}
|
||||
|
||||
postingsEnum = termsEnum.postings(null, postingsEnum, 0);
|
||||
|
||||
int lastDoc = -1;
|
||||
while (true) {
|
||||
int doc = postingsEnum.nextDoc();
|
||||
if (doc == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
break;
|
||||
}
|
||||
if (doc <= lastDoc) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc);
|
||||
}
|
||||
if (doc >= maxDoc) {
|
||||
throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc);
|
||||
}
|
||||
|
||||
//System.out.println(" doc=" + doc);
|
||||
docsSeen.set(doc);
|
||||
|
||||
lastDoc = doc;
|
||||
}
|
||||
|
||||
termCount++;
|
||||
}
|
||||
}
|
||||
|
||||
/** Test Terms.intersect on this range, and validates that it returns the same doc ids as using non-intersect TermsEnum. Returns true if
|
||||
* any fake terms were seen. */
|
||||
private static boolean checkSingleTermRange(String field, int maxDoc, Terms terms, BytesRef minTerm, BytesRef maxTerm, FixedBitSet normalDocs, FixedBitSet intersectDocs) throws IOException {
|
||||
// System.out.println(" check minTerm=" + minTerm + " maxTerm=" + maxTerm);
|
||||
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
TermsEnum.SeekStatus status = termsEnum.seekCeil(minTerm);
|
||||
if (status != TermsEnum.SeekStatus.FOUND) {
|
||||
throw new RuntimeException("failed to seek to existing term field=" + field + " term=" + minTerm);
|
||||
}
|
||||
|
||||
// Do "dumb" iteration to visit all terms in the range:
|
||||
long normalTermCount = getDocsFromTermRange(field, maxDoc, termsEnum, normalDocs, minTerm, maxTerm, false);
|
||||
|
||||
// Now do the same operation using intersect:
|
||||
long intersectTermCount = getDocsFromTermRange(field, maxDoc, terms.intersect(new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, true, maxTerm, false), true, false, Integer.MAX_VALUE, true), null), intersectDocs, minTerm, maxTerm, true);
|
||||
|
||||
if (intersectTermCount > normalTermCount) {
|
||||
throw new RuntimeException("intersect returned too many terms: field=" + field + " intersectTermCount=" + intersectTermCount + " normalTermCount=" + normalTermCount);
|
||||
}
|
||||
|
||||
if (normalDocs.equals(intersectDocs) == false) {
|
||||
throw new RuntimeException("intersect visited different docs than straight terms enum: " + normalDocs.cardinality() + " for straight enum, vs " + intersectDocs.cardinality() + " for intersect, minTerm=" + minTerm + " maxTerm=" + maxTerm);
|
||||
}
|
||||
//System.out.println(" " + intersectTermCount + " vs " + normalTermCount);
|
||||
return intersectTermCount != normalTermCount;
|
||||
}
|
||||
|
||||
/** Make an effort to visit "fake" (e.g. auto-prefix) terms. We do this by running term range intersections across an initially wide
|
||||
* interval of terms, at different boundaries, and then gradually decrease the interval. This is not guaranteed to hit all non-real
|
||||
* terms (doing that in general is non-trivial), but it should hit many of them, and validate their postings against the postings for the
|
||||
* real terms. */
|
||||
private static void checkTermRanges(String field, int maxDoc, Terms terms, long numTerms) throws IOException {
|
||||
|
||||
// We'll target this many terms in our interval for the current level:
|
||||
double currentInterval = numTerms;
|
||||
|
||||
FixedBitSet normalDocs = new FixedBitSet(maxDoc);
|
||||
FixedBitSet intersectDocs = new FixedBitSet(maxDoc);
|
||||
|
||||
TermsEnum termsEnum = null;
|
||||
//System.out.println("CI.checkTermRanges field=" + field + " numTerms=" + numTerms);
|
||||
|
||||
while (currentInterval >= 10.0) {
|
||||
//System.out.println(" cycle interval=" + currentInterval);
|
||||
|
||||
// We iterate this terms enum to locate min/max term for each sliding/overlapping interval we test at the current level:
|
||||
termsEnum = terms.iterator(termsEnum);
|
||||
|
||||
long termCount = 0;
|
||||
|
||||
Deque<BytesRef> termBounds = new LinkedList<>();
|
||||
|
||||
long lastTermAdded = Long.MIN_VALUE;
|
||||
|
||||
BytesRefBuilder lastTerm = null;
|
||||
|
||||
while (true) {
|
||||
BytesRef term = termsEnum.next();
|
||||
if (term == null) {
|
||||
break;
|
||||
}
|
||||
//System.out.println(" top: term=" + term.utf8ToString());
|
||||
if (termCount >= lastTermAdded + currentInterval/4) {
|
||||
termBounds.add(BytesRef.deepCopyOf(term));
|
||||
lastTermAdded = termCount;
|
||||
if (termBounds.size() == 5) {
|
||||
BytesRef minTerm = termBounds.removeFirst();
|
||||
BytesRef maxTerm = termBounds.getLast();
|
||||
checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
|
||||
}
|
||||
}
|
||||
termCount++;
|
||||
|
||||
if (lastTerm == null) {
|
||||
lastTerm = new BytesRefBuilder();
|
||||
lastTerm.copyBytes(term);
|
||||
} else {
|
||||
if (lastTerm.get().compareTo(term) >= 0) {
|
||||
throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term);
|
||||
}
|
||||
lastTerm.copyBytes(term);
|
||||
}
|
||||
}
|
||||
|
||||
if (lastTerm != null && termBounds.isEmpty() == false) {
|
||||
BytesRef minTerm = termBounds.removeFirst();
|
||||
BytesRef maxTerm = lastTerm.get();
|
||||
checkSingleTermRange(field, maxDoc, terms, minTerm, maxTerm, normalDocs, intersectDocs);
|
||||
}
|
||||
|
||||
currentInterval *= .75;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* checks Fields api is consistent with itself.
|
||||
* searcher is optional, to verify with queries. Can be null.
|
||||
|
@ -922,6 +1100,7 @@ public class CheckIndex implements Closeable {
|
|||
|
||||
String lastField = null;
|
||||
for (String field : fields) {
|
||||
|
||||
// MultiFieldsEnum relies upon this order...
|
||||
if (lastField != null && field.compareTo(lastField) <= 0) {
|
||||
throw new RuntimeException("fields out of order: lastField=" + lastField + " field=" + field);
|
||||
|
@ -1031,7 +1210,8 @@ public class CheckIndex implements Closeable {
|
|||
if (term == null) {
|
||||
break;
|
||||
}
|
||||
|
||||
// System.out.println("CI: field=" + field + " check term=" + term + " docFreq=" + termsEnum.docFreq());
|
||||
|
||||
assert term.isValid();
|
||||
|
||||
// make sure terms arrive in order according to
|
||||
|
@ -1323,13 +1503,21 @@ public class CheckIndex implements Closeable {
|
|||
// docs got deleted and then merged away):
|
||||
|
||||
} else {
|
||||
|
||||
long fieldTermCount = (status.delTermCount+status.termCount)-termCountStart;
|
||||
|
||||
if (hasFreqs == false) {
|
||||
// For DOCS_ONLY fields we recursively test term ranges:
|
||||
checkTermRanges(field, maxDoc, fieldTerms, fieldTermCount);
|
||||
}
|
||||
|
||||
final Object stats = fieldTerms.getStats();
|
||||
assert stats != null;
|
||||
if (status.blockTreeStats == null) {
|
||||
status.blockTreeStats = new HashMap<>();
|
||||
}
|
||||
status.blockTreeStats.put(field, stats);
|
||||
|
||||
|
||||
if (sumTotalTermFreq != 0) {
|
||||
final long v = fields.terms(field).getSumTotalTermFreq();
|
||||
if (v != -1 && sumTotalTermFreq != v) {
|
||||
|
@ -1344,11 +1532,9 @@ public class CheckIndex implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
if (fieldTerms != null) {
|
||||
final int v = fieldTerms.getDocCount();
|
||||
if (v != -1 && visitedDocs.cardinality() != v) {
|
||||
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
|
||||
}
|
||||
final int v = fieldTerms.getDocCount();
|
||||
if (v != -1 && visitedDocs.cardinality() != v) {
|
||||
throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality());
|
||||
}
|
||||
|
||||
// Test seek to last term:
|
||||
|
@ -1356,6 +1542,9 @@ public class CheckIndex implements Closeable {
|
|||
if (termsEnum.seekCeil(lastTerm.get()) != TermsEnum.SeekStatus.FOUND) {
|
||||
throw new RuntimeException("seek to last term " + lastTerm + " failed");
|
||||
}
|
||||
if (termsEnum.term().equals(lastTerm.get()) == false) {
|
||||
throw new RuntimeException("seek to last term " + lastTerm.get() + " returned FOUND but seeked to the wrong term " + termsEnum.term());
|
||||
}
|
||||
|
||||
int expectedDocFreq = termsEnum.docFreq();
|
||||
PostingsEnum d = termsEnum.postings(null, null, PostingsEnum.NONE);
|
||||
|
@ -1364,21 +1553,21 @@ public class CheckIndex implements Closeable {
|
|||
docFreq++;
|
||||
}
|
||||
if (docFreq != expectedDocFreq) {
|
||||
throw new RuntimeException("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq);
|
||||
throw new RuntimeException("docFreq for last term " + lastTerm.toBytesRef() + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq);
|
||||
}
|
||||
}
|
||||
|
||||
// check unique term count
|
||||
long termCount = -1;
|
||||
|
||||
if ((status.delTermCount+status.termCount)-termCountStart > 0) {
|
||||
if (fieldTermCount > 0) {
|
||||
termCount = fields.terms(field).size();
|
||||
|
||||
if (termCount != -1 && termCount != status.delTermCount + status.termCount - termCountStart) {
|
||||
throw new RuntimeException("termCount mismatch " + (status.delTermCount + termCount) + " vs " + (status.termCount - termCountStart));
|
||||
if (termCount != -1 && termCount != fieldTermCount) {
|
||||
throw new RuntimeException("termCount mismatch " + termCount + " vs " + fieldTermCount);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Test seeking by ord
|
||||
if (hasOrd && status.termCount-termCountStart > 0) {
|
||||
int seekCount = (int) Math.min(10000L, termCount);
|
||||
|
@ -1398,6 +1587,9 @@ public class CheckIndex implements Closeable {
|
|||
if (termsEnum.seekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) {
|
||||
throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed");
|
||||
}
|
||||
if (termsEnum.term().equals(seekTerms[i]) == false) {
|
||||
throw new RuntimeException("seek to existing term " + seekTerms[i] + " returned FOUND but seeked to the wrong term " + termsEnum.term());
|
||||
}
|
||||
|
||||
postings = termsEnum.postings(liveDocs, postings, PostingsEnum.NONE);
|
||||
if (postings == null) {
|
||||
|
|
|
@ -151,7 +151,6 @@ class FreqProxFields extends Fields {
|
|||
}
|
||||
|
||||
public SeekStatus seekCeil(BytesRef text) {
|
||||
|
||||
// TODO: we could instead keep the BytesRefHash
|
||||
// intact so this is a hash lookup
|
||||
|
||||
|
@ -170,17 +169,19 @@ class FreqProxFields extends Fields {
|
|||
} else {
|
||||
// found:
|
||||
ord = mid;
|
||||
assert term().compareTo(text) == 0;
|
||||
return SeekStatus.FOUND;
|
||||
}
|
||||
}
|
||||
|
||||
// not found:
|
||||
ord = lo + 1;
|
||||
ord = lo;
|
||||
if (ord >= numTerms) {
|
||||
return SeekStatus.END;
|
||||
} else {
|
||||
int textStart = postingsArray.textStarts[sortedTermIDs[ord]];
|
||||
terms.bytePool.setBytesRef(scratch, textStart);
|
||||
assert term().compareTo(text) > 0;
|
||||
return SeekStatus.NOT_FOUND;
|
||||
}
|
||||
}
|
||||
|
@ -309,7 +310,7 @@ class FreqProxFields extends Fields {
|
|||
final FreqProxPostingsArray postingsArray;
|
||||
final ByteSliceReader reader = new ByteSliceReader();
|
||||
final boolean readTermFreq;
|
||||
int docID;
|
||||
int docID = -1;
|
||||
int freq;
|
||||
boolean ended;
|
||||
int termID;
|
||||
|
@ -324,7 +325,7 @@ class FreqProxFields extends Fields {
|
|||
this.termID = termID;
|
||||
terms.initReader(reader, termID, 0);
|
||||
ended = false;
|
||||
docID = 0;
|
||||
docID = -1;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -365,6 +366,9 @@ class FreqProxFields extends Fields {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (docID == -1) {
|
||||
docID = 0;
|
||||
}
|
||||
if (reader.eof()) {
|
||||
if (ended) {
|
||||
return NO_MORE_DOCS;
|
||||
|
@ -412,7 +416,7 @@ class FreqProxFields extends Fields {
|
|||
final ByteSliceReader reader = new ByteSliceReader();
|
||||
final ByteSliceReader posReader = new ByteSliceReader();
|
||||
final boolean readOffsets;
|
||||
int docID;
|
||||
int docID = -1;
|
||||
int freq;
|
||||
int pos;
|
||||
int startOffset;
|
||||
|
@ -436,7 +440,7 @@ class FreqProxFields extends Fields {
|
|||
terms.initReader(reader, termID, 0);
|
||||
terms.initReader(posReader, termID, 1);
|
||||
ended = false;
|
||||
docID = 0;
|
||||
docID = -1;
|
||||
posLeft = 0;
|
||||
}
|
||||
|
||||
|
@ -452,6 +456,9 @@ class FreqProxFields extends Fields {
|
|||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
if (docID == -1) {
|
||||
docID = 0;
|
||||
}
|
||||
while (posLeft != 0) {
|
||||
nextPosition();
|
||||
}
|
||||
|
|
|
@ -49,6 +49,7 @@ final class MappingMultiPostingsEnum extends PostingsEnum {
|
|||
this.numSubs = postingsEnum.getNumSubs();
|
||||
this.subs = postingsEnum.getSubs();
|
||||
upto = -1;
|
||||
doc = -1;
|
||||
current = null;
|
||||
this.multiDocsAndPositionsEnum = postingsEnum;
|
||||
return this;
|
||||
|
|
|
@ -17,6 +17,7 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.codecs.BlockTermState;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -165,4 +166,30 @@ public final class TermContext {
|
|||
public void setDocFreq(int docFreq) {
|
||||
this.docFreq = docFreq;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns true if all terms stored here are real (e.g., not auto-prefix terms).
|
||||
*
|
||||
* @lucene.internal */
|
||||
public boolean hasOnlyRealTerms() {
|
||||
for(TermState termState : states) {
|
||||
if (termState instanceof BlockTermState && ((BlockTermState) termState).isRealTerm == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
sb.append("TermContext\n");
|
||||
for(TermState termState : states) {
|
||||
sb.append(" state=");
|
||||
sb.append(termState.toString());
|
||||
sb.append('\n');
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.codecs.blocktree.BlockTreeTermsWriter;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.automaton.CompiledAutomaton;
|
||||
|
@ -42,17 +43,23 @@ public abstract class Terms {
|
|||
* implementation can do so. */
|
||||
public abstract TermsEnum iterator(TermsEnum reuse) throws IOException;
|
||||
|
||||
/** Returns a TermsEnum that iterates over all terms that
|
||||
* are accepted by the provided {@link
|
||||
/** Returns a TermsEnum that iterates over all terms and
|
||||
* documents that are accepted by the provided {@link
|
||||
* CompiledAutomaton}. If the <code>startTerm</code> is
|
||||
* provided then the returned enum will only accept terms
|
||||
* provided then the returned enum will only return terms
|
||||
* {@code > startTerm}, but you still must call
|
||||
* next() first to get to the first term. Note that the
|
||||
* provided <code>startTerm</code> must be accepted by
|
||||
* the automaton.
|
||||
*
|
||||
* <p><b>NOTE</b>: the returned TermsEnum cannot
|
||||
* seek</p>. */
|
||||
* seek</p>.
|
||||
*
|
||||
* <p><b>NOTE</b>: the terms dictionary is free to
|
||||
* return arbitrary terms as long as the resulted visited
|
||||
* docs is the same. E.g., {@link BlockTreeTermsWriter}
|
||||
* creates auto-prefix terms during indexing to reduce the
|
||||
* number of terms visited. */
|
||||
public TermsEnum intersect(CompiledAutomaton compiled, final BytesRef startTerm) throws IOException {
|
||||
|
||||
// TODO: could we factor out a common interface b/w
|
||||
|
@ -64,13 +71,17 @@ public abstract class Terms {
|
|||
// TODO: eventually we could support seekCeil/Exact on
|
||||
// the returned enum, instead of only being able to seek
|
||||
// at the start
|
||||
|
||||
TermsEnum termsEnum = iterator(null);
|
||||
|
||||
if (compiled.type != CompiledAutomaton.AUTOMATON_TYPE.NORMAL) {
|
||||
throw new IllegalArgumentException("please use CompiledAutomaton.getTermsEnum instead");
|
||||
}
|
||||
|
||||
if (startTerm == null) {
|
||||
return new AutomatonTermsEnum(iterator(null), compiled);
|
||||
return new AutomatonTermsEnum(termsEnum, compiled);
|
||||
} else {
|
||||
return new AutomatonTermsEnum(iterator(null), compiled) {
|
||||
return new AutomatonTermsEnum(termsEnum, compiled) {
|
||||
@Override
|
||||
protected BytesRef nextSeekTerm(BytesRef term) throws IOException {
|
||||
if (term == null) {
|
||||
|
|
|
@ -99,6 +99,7 @@ public class AutomatonQuery extends MultiTermQuery {
|
|||
super(term.field());
|
||||
this.term = term;
|
||||
this.automaton = automaton;
|
||||
// TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
|
||||
this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,12 +17,7 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
@ -33,6 +28,7 @@ import org.apache.lucene.util.automaton.Automaton;
|
|||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_REWRITE}
|
||||
* rewrite method. */
|
||||
|
||||
public class PrefixQuery extends AutomatonQuery {
|
||||
|
||||
/** Constructs a query for terms starting with <code>prefix</code>. */
|
||||
|
|
|
@ -18,19 +18,19 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.index.TermState;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.search.MultiTermQuery.RewriteMethod;
|
||||
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.ByteBlockPool;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.util.BytesRefHash.DirectBytesStartArray;
|
||||
|
||||
/**
|
||||
* Base rewrite method that translates each term into a query, and keeps
|
||||
|
@ -112,7 +112,7 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
|
|||
for (int i = 0; i < size; i++) {
|
||||
final int pos = sort[i];
|
||||
final Term term = new Term(query.getField(), col.terms.get(pos, new BytesRef()));
|
||||
assert reader.docFreq(term) == termStates[pos].docFreq();
|
||||
assert termStates[pos].hasOnlyRealTerms() == false || reader.docFreq(term) == termStates[pos].docFreq();
|
||||
addClause(result, term, termStates[pos].docFreq(), query.getBoost() * boost[pos], termStates[pos]);
|
||||
}
|
||||
}
|
||||
|
@ -137,7 +137,7 @@ public abstract class ScoringRewrite<Q extends Query> extends TermCollectingRewr
|
|||
final int e = terms.add(bytes);
|
||||
final TermState state = termsEnum.termState();
|
||||
assert state != null;
|
||||
if (e < 0 ) {
|
||||
if (e < 0) {
|
||||
// duplicate term: update docFreq
|
||||
final int pos = (-e)-1;
|
||||
array.termState[pos].register(state, readerContext.ord, termsEnum.docFreq(), termsEnum.totalTermFreq());
|
||||
|
|
|
@ -17,22 +17,17 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.ToStringUtils;
|
||||
import org.apache.lucene.util.automaton.Automata;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
|
||||
/**
|
||||
* A Query that matches documents within an range of terms.
|
||||
*
|
||||
* <p>This query matches the documents looking for terms that fall into the
|
||||
* supplied range according to {@link
|
||||
* Byte#compareTo(Byte)}. It is not intended
|
||||
* for numerical ranges; use {@link NumericRangeQuery} instead.
|
||||
* supplied range according to {@link BytesRef#compareTo(BytesRef)}.
|
||||
*
|
||||
* <p>This query uses the {@link
|
||||
* MultiTermQuery#CONSTANT_SCORE_REWRITE}
|
||||
|
@ -40,12 +35,11 @@ import org.apache.lucene.util.ToStringUtils;
|
|||
* @since 2.9
|
||||
*/
|
||||
|
||||
public class TermRangeQuery extends MultiTermQuery {
|
||||
private BytesRef lowerTerm;
|
||||
private BytesRef upperTerm;
|
||||
private boolean includeLower;
|
||||
private boolean includeUpper;
|
||||
|
||||
public class TermRangeQuery extends AutomatonQuery {
|
||||
private final BytesRef lowerTerm;
|
||||
private final BytesRef upperTerm;
|
||||
private final boolean includeLower;
|
||||
private final boolean includeUpper;
|
||||
|
||||
/**
|
||||
* Constructs a query selecting all terms greater/equal than <code>lowerTerm</code>
|
||||
|
@ -70,13 +64,28 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
* included in the range.
|
||||
*/
|
||||
public TermRangeQuery(String field, BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
|
||||
super(field);
|
||||
super(new Term(field, lowerTerm), toAutomaton(lowerTerm, upperTerm, includeLower, includeUpper), Integer.MAX_VALUE, true);
|
||||
this.lowerTerm = lowerTerm;
|
||||
this.upperTerm = upperTerm;
|
||||
this.includeLower = includeLower;
|
||||
this.includeUpper = includeUpper;
|
||||
}
|
||||
|
||||
public static Automaton toAutomaton(BytesRef lowerTerm, BytesRef upperTerm, boolean includeLower, boolean includeUpper) {
|
||||
|
||||
if (lowerTerm == null) {
|
||||
// makeBinaryInterval is more picky than we are:
|
||||
includeLower = true;
|
||||
}
|
||||
|
||||
if (upperTerm == null) {
|
||||
// makeBinaryInterval is more picky than we are:
|
||||
includeUpper = true;
|
||||
}
|
||||
|
||||
return Automata.makeBinaryInterval(lowerTerm, includeLower, upperTerm, includeUpper);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory that creates a new TermRangeQuery using Strings for term text.
|
||||
*/
|
||||
|
@ -98,37 +107,22 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
/** Returns <code>true</code> if the upper endpoint is inclusive */
|
||||
public boolean includesUpper() { return includeUpper; }
|
||||
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
if (lowerTerm != null && upperTerm != null && lowerTerm.compareTo(upperTerm) > 0) {
|
||||
return TermsEnum.EMPTY;
|
||||
}
|
||||
|
||||
TermsEnum tenum = terms.iterator(null);
|
||||
|
||||
if ((lowerTerm == null || (includeLower && lowerTerm.length == 0)) && upperTerm == null) {
|
||||
return tenum;
|
||||
}
|
||||
return new TermRangeTermsEnum(tenum,
|
||||
lowerTerm, upperTerm, includeLower, includeUpper);
|
||||
}
|
||||
|
||||
/** Prints a user-readable version of this query. */
|
||||
@Override
|
||||
public String toString(String field) {
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
if (!getField().equals(field)) {
|
||||
buffer.append(getField());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append(includeLower ? '[' : '{');
|
||||
// TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8!
|
||||
buffer.append(lowerTerm != null ? ("*".equals(Term.toString(lowerTerm)) ? "\\*" : Term.toString(lowerTerm)) : "*");
|
||||
buffer.append(" TO ");
|
||||
buffer.append(upperTerm != null ? ("*".equals(Term.toString(upperTerm)) ? "\\*" : Term.toString(upperTerm)) : "*");
|
||||
buffer.append(includeUpper ? ']' : '}');
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
StringBuilder buffer = new StringBuilder();
|
||||
if (!getField().equals(field)) {
|
||||
buffer.append(getField());
|
||||
buffer.append(":");
|
||||
}
|
||||
buffer.append(includeLower ? '[' : '{');
|
||||
// TODO: all these toStrings for queries should just output the bytes, it might not be UTF-8!
|
||||
buffer.append(lowerTerm != null ? ("*".equals(Term.toString(lowerTerm)) ? "\\*" : Term.toString(lowerTerm)) : "*");
|
||||
buffer.append(" TO ");
|
||||
buffer.append(upperTerm != null ? ("*".equals(Term.toString(upperTerm)) ? "\\*" : Term.toString(upperTerm)) : "*");
|
||||
buffer.append(includeUpper ? ']' : '}');
|
||||
buffer.append(ToStringUtils.boost(getBoost()));
|
||||
return buffer.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -167,5 +161,4 @@ public class TermRangeQuery extends MultiTermQuery {
|
|||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,101 +0,0 @@
|
|||
package org.apache.lucene.search;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Subclass of FilteredTermEnum for enumerating all terms that match the
|
||||
* specified range parameters. Each term in the enumeration is
|
||||
* greater than all that precede it.
|
||||
*/
|
||||
public class TermRangeTermsEnum extends FilteredTermsEnum {
|
||||
|
||||
final private boolean includeLower;
|
||||
final private boolean includeUpper;
|
||||
final private BytesRef lowerBytesRef;
|
||||
final private BytesRef upperBytesRef;
|
||||
|
||||
/**
|
||||
* Enumerates all terms greater/equal than <code>lowerTerm</code>
|
||||
* but less/equal than <code>upperTerm</code>.
|
||||
*
|
||||
* If an endpoint is null, it is said to be "open". Either or both
|
||||
* endpoints may be open. Open endpoints may not be exclusive
|
||||
* (you can't select all but the first or last term without
|
||||
* explicitly specifying the term to exclude.)
|
||||
*
|
||||
* @param tenum
|
||||
* TermsEnum to filter
|
||||
* @param lowerTerm
|
||||
* The term text at the lower end of the range
|
||||
* @param upperTerm
|
||||
* The term text at the upper end of the range
|
||||
* @param includeLower
|
||||
* If true, the <code>lowerTerm</code> is included in the range.
|
||||
* @param includeUpper
|
||||
* If true, the <code>upperTerm</code> is included in the range.
|
||||
*/
|
||||
public TermRangeTermsEnum(TermsEnum tenum, BytesRef lowerTerm, BytesRef upperTerm,
|
||||
boolean includeLower, boolean includeUpper) {
|
||||
super(tenum);
|
||||
|
||||
// do a little bit of normalization...
|
||||
// open ended range queries should always be inclusive.
|
||||
if (lowerTerm == null) {
|
||||
this.lowerBytesRef = new BytesRef();
|
||||
this.includeLower = true;
|
||||
} else {
|
||||
this.lowerBytesRef = lowerTerm;
|
||||
this.includeLower = includeLower;
|
||||
}
|
||||
|
||||
if (upperTerm == null) {
|
||||
this.includeUpper = true;
|
||||
upperBytesRef = null;
|
||||
} else {
|
||||
this.includeUpper = includeUpper;
|
||||
upperBytesRef = upperTerm;
|
||||
}
|
||||
|
||||
setInitialSeekTerm(lowerBytesRef);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
if (!this.includeLower && term.equals(lowerBytesRef))
|
||||
return AcceptStatus.NO;
|
||||
|
||||
// Use this field's default sort ordering
|
||||
if (upperBytesRef != null) {
|
||||
final int cmp = upperBytesRef.compareTo(term);
|
||||
/*
|
||||
* if beyond the upper term, or is exclusive and this is equal to
|
||||
* the upper term, break out
|
||||
*/
|
||||
if ((cmp < 0) ||
|
||||
(!includeUpper && cmp==0)) {
|
||||
return AcceptStatus.END;
|
||||
}
|
||||
}
|
||||
|
||||
return AcceptStatus.YES;
|
||||
}
|
||||
}
|
|
@ -72,6 +72,18 @@ final public class Automata {
|
|||
a.finishState();
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new (deterministic) automaton that accepts all binary terms.
|
||||
*/
|
||||
public static Automaton makeAnyBinary() {
|
||||
Automaton a = new Automaton();
|
||||
int s = a.createState();
|
||||
a.setAccept(s, true);
|
||||
a.addTransition(s, s, 0, 255);
|
||||
a.finishState();
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new (deterministic) automaton that accepts any single codepoint.
|
||||
|
@ -204,8 +216,172 @@ final public class Automata {
|
|||
return s;
|
||||
}
|
||||
|
||||
/** Creates a new deterministic, minimal automaton accepting
|
||||
* all binary terms in the specified interval. Note that unlike
|
||||
* {@link #makeDecimalInterval}, the returned automaton is infinite,
|
||||
* because terms behave like floating point numbers leading with
|
||||
* a decimal point. However, in the special case where min == max,
|
||||
* and both are inclusive, the automata will be finite and accept
|
||||
* exactly one term. */
|
||||
public static Automaton makeBinaryInterval(BytesRef min, boolean minInclusive, BytesRef max, boolean maxInclusive) {
|
||||
|
||||
if (min == null && minInclusive == false) {
|
||||
throw new IllegalArgumentException("minInclusive must be true when min is null (open ended)");
|
||||
}
|
||||
|
||||
if (max == null && maxInclusive == false) {
|
||||
throw new IllegalArgumentException("maxInclusive must be true when max is null (open ended)");
|
||||
}
|
||||
|
||||
if (min != null && min.length == 0 && minInclusive == true) {
|
||||
// Silly empty string corner case:
|
||||
min = null;
|
||||
}
|
||||
|
||||
if (min == null) {
|
||||
if (max == null) {
|
||||
// Accepts all terms:
|
||||
return makeAnyBinary();
|
||||
}
|
||||
min = new BytesRef();
|
||||
minInclusive = true;
|
||||
}
|
||||
int cmp;
|
||||
if (max != null) {
|
||||
cmp = min.compareTo(max);
|
||||
} else {
|
||||
cmp = -1;
|
||||
}
|
||||
if (cmp == 0) {
|
||||
if (minInclusive == false || maxInclusive == false) {
|
||||
return makeEmpty();
|
||||
} else {
|
||||
return makeBinary(min);
|
||||
}
|
||||
} else if (cmp > 0) {
|
||||
// max > min
|
||||
return makeEmpty();
|
||||
}
|
||||
|
||||
Automaton a = new Automaton();
|
||||
int startState = a.createState();
|
||||
int sinkState = a.createState();
|
||||
a.setAccept(sinkState, true);
|
||||
|
||||
// This state accepts all suffixes:
|
||||
a.addTransition(sinkState, sinkState, 0, 255);
|
||||
|
||||
boolean equalPrefix = true;
|
||||
int lastState = startState;
|
||||
int firstMaxState = -1;
|
||||
int sharedPrefixLength = 0;
|
||||
for(int i=0;i<min.length;i++) {
|
||||
int minLabel = min.bytes[min.offset+i] & 0xff;
|
||||
|
||||
int maxLabel;
|
||||
if (max != null && equalPrefix && i < max.length) {
|
||||
maxLabel = max.bytes[max.offset+i] & 0xff;
|
||||
} else {
|
||||
maxLabel = -1;
|
||||
}
|
||||
|
||||
int nextState;
|
||||
if (minInclusive && i == min.length-1 && (equalPrefix == false || minLabel != maxLabel)) {
|
||||
nextState = sinkState;
|
||||
} else {
|
||||
nextState = a.createState();
|
||||
}
|
||||
|
||||
if (equalPrefix) {
|
||||
|
||||
if (minLabel == maxLabel) {
|
||||
// Still in shared prefix
|
||||
a.addTransition(lastState, nextState, minLabel);
|
||||
} else if (max == null) {
|
||||
equalPrefix = false;
|
||||
sharedPrefixLength = 0;
|
||||
a.addTransition(lastState, sinkState, minLabel+1, 0xff);
|
||||
a.addTransition(lastState, nextState, minLabel);
|
||||
} else {
|
||||
// This is the first point where min & max diverge:
|
||||
assert maxLabel > minLabel;
|
||||
|
||||
a.addTransition(lastState, nextState, minLabel);
|
||||
|
||||
if (maxLabel > minLabel + 1) {
|
||||
a.addTransition(lastState, sinkState, minLabel+1, maxLabel-1);
|
||||
}
|
||||
|
||||
// Now fork off path for max:
|
||||
if (maxInclusive || i < max.length-1) {
|
||||
firstMaxState = a.createState();
|
||||
if (i < max.length-1) {
|
||||
a.setAccept(firstMaxState, true);
|
||||
}
|
||||
a.addTransition(lastState, firstMaxState, maxLabel);
|
||||
}
|
||||
equalPrefix = false;
|
||||
sharedPrefixLength = i;
|
||||
}
|
||||
} else {
|
||||
// OK, already diverged:
|
||||
a.addTransition(lastState, nextState, minLabel);
|
||||
if (minLabel < 255) {
|
||||
a.addTransition(lastState, sinkState, minLabel+1, 255);
|
||||
}
|
||||
}
|
||||
lastState = nextState;
|
||||
}
|
||||
|
||||
// Accept any suffix appended to the min term:
|
||||
if (equalPrefix == false && lastState != sinkState && lastState != startState) {
|
||||
a.addTransition(lastState, sinkState, 0, 255);
|
||||
}
|
||||
|
||||
if (minInclusive) {
|
||||
// Accept exactly the min term:
|
||||
a.setAccept(lastState, true);
|
||||
}
|
||||
|
||||
if (max != null) {
|
||||
|
||||
// Now do max:
|
||||
if (firstMaxState == -1) {
|
||||
// Min was a full prefix of max
|
||||
sharedPrefixLength = min.length;
|
||||
} else {
|
||||
lastState = firstMaxState;
|
||||
sharedPrefixLength++;
|
||||
}
|
||||
for(int i=sharedPrefixLength;i<max.length;i++) {
|
||||
int maxLabel = max.bytes[max.offset+i]&0xff;
|
||||
if (maxLabel > 0) {
|
||||
a.addTransition(lastState, sinkState, 0, maxLabel-1);
|
||||
}
|
||||
if (maxInclusive || i < max.length-1) {
|
||||
int nextState = a.createState();
|
||||
if (i < max.length-1) {
|
||||
a.setAccept(nextState, true);
|
||||
}
|
||||
a.addTransition(lastState, nextState, maxLabel);
|
||||
lastState = nextState;
|
||||
}
|
||||
}
|
||||
|
||||
if (maxInclusive) {
|
||||
a.setAccept(lastState, true);
|
||||
}
|
||||
}
|
||||
|
||||
a.finishState();
|
||||
|
||||
assert a.isDeterministic(): a.toDot();
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new automaton that accepts strings representing decimal
|
||||
* Returns a new automaton that accepts strings representing decimal (base 10)
|
||||
* non-negative integers in the given interval.
|
||||
*
|
||||
* @param min minimal value of interval
|
||||
|
@ -218,7 +394,7 @@ final public class Automata {
|
|||
* interval cannot be expressed with the given fixed number of
|
||||
* digits
|
||||
*/
|
||||
public static Automaton makeInterval(int min, int max, int digits)
|
||||
public static Automaton makeDecimalInterval(int min, int max, int digits)
|
||||
throws IllegalArgumentException {
|
||||
String x = Integer.toString(min);
|
||||
String y = Integer.toString(max);
|
||||
|
@ -275,7 +451,30 @@ final public class Automata {
|
|||
for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
|
||||
int state = a.createState();
|
||||
cp = s.codePointAt(i);
|
||||
a.addTransition(lastState, state, cp, cp);
|
||||
a.addTransition(lastState, state, cp);
|
||||
lastState = state;
|
||||
}
|
||||
|
||||
a.setAccept(lastState, true);
|
||||
a.finishState();
|
||||
|
||||
assert a.isDeterministic();
|
||||
assert Operations.hasDeadStates(a) == false;
|
||||
|
||||
return a;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a new (deterministic) automaton that accepts the single given
|
||||
* binary term.
|
||||
*/
|
||||
public static Automaton makeBinary(BytesRef term) {
|
||||
Automaton a = new Automaton();
|
||||
int lastState = a.createState();
|
||||
for (int i=0;i<term.length;i++) {
|
||||
int state = a.createState();
|
||||
int label = term.bytes[term.offset+i] & 0xff;
|
||||
a.addTransition(lastState, state, label);
|
||||
lastState = state;
|
||||
}
|
||||
|
||||
|
|
|
@ -491,11 +491,50 @@ public class Automaton implements Accountable {
|
|||
public void getNextTransition(Transition t) {
|
||||
// Make sure there is still a transition left:
|
||||
assert (t.transitionUpto+3 - states[2*t.source]) <= 3*states[2*t.source+1];
|
||||
|
||||
// Make sure transitions are in fact sorted:
|
||||
assert transitionSorted(t);
|
||||
|
||||
t.dest = transitions[t.transitionUpto++];
|
||||
t.min = transitions[t.transitionUpto++];
|
||||
t.max = transitions[t.transitionUpto++];
|
||||
}
|
||||
|
||||
private boolean transitionSorted(Transition t) {
|
||||
|
||||
int upto = t.transitionUpto;
|
||||
if (upto == states[2*t.source]) {
|
||||
// Transition isn't initialzed yet (this is the first transition); don't check:
|
||||
return true;
|
||||
}
|
||||
|
||||
int nextDest = transitions[upto];
|
||||
int nextMin = transitions[upto+1];
|
||||
int nextMax = transitions[upto+2];
|
||||
if (nextMin > t.min) {
|
||||
return true;
|
||||
} else if (nextMin < t.min) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Min is equal, now test max:
|
||||
if (nextMax > t.max) {
|
||||
return true;
|
||||
} else if (nextMax < t.max) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Max is also equal, now test dest:
|
||||
if (nextDest > t.dest) {
|
||||
return true;
|
||||
} else if (nextDest < t.dest) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// We should never see fully equal transitions here:
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Fill the provided {@link Transition} with the index'th
|
||||
* transition leaving the specified state. */
|
||||
public void getTransition(int state, int index, Transition t) {
|
||||
|
@ -565,7 +604,7 @@ public class Automaton implements Accountable {
|
|||
//System.out.println("toDot: state " + state + " has " + numTransitions + " transitions; t.nextTrans=" + t.transitionUpto);
|
||||
for(int i=0;i<numTransitions;i++) {
|
||||
getNextTransition(t);
|
||||
//System.out.println(" t.nextTrans=" + t.transitionUpto);
|
||||
//System.out.println(" t.nextTrans=" + t.transitionUpto + " t=" + t);
|
||||
assert t.max >= t.min;
|
||||
b.append(" ");
|
||||
b.append(state);
|
||||
|
|
|
@ -28,8 +28,8 @@ public class ByteRunAutomaton extends RunAutomaton {
|
|||
}
|
||||
|
||||
/** expert: if utf8 is true, the input is already byte-based */
|
||||
public ByteRunAutomaton(Automaton a, boolean utf8, int maxDeterminizedStates) {
|
||||
super(utf8 ? a : new UTF32ToUTF8().convert(a), 256, true, maxDeterminizedStates);
|
||||
public ByteRunAutomaton(Automaton a, boolean isBinary, int maxDeterminizedStates) {
|
||||
super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, true, maxDeterminizedStates);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -90,12 +90,41 @@ public class CompiledAutomaton {
|
|||
*/
|
||||
public final Boolean finite;
|
||||
|
||||
/** Which state, if any, accepts all suffixes, else -1. */
|
||||
public final int sinkState;
|
||||
|
||||
/** Create this, passing simplify=true and finite=null, so that we try
|
||||
* to simplify the automaton and determine if it is finite. */
|
||||
public CompiledAutomaton(Automaton automaton) {
|
||||
this(automaton, null, true);
|
||||
}
|
||||
|
||||
/** Returns sink state, if present, else -1. */
|
||||
private static int findSinkState(Automaton automaton) {
|
||||
int numStates = automaton.getNumStates();
|
||||
Transition t = new Transition();
|
||||
int foundState = -1;
|
||||
for (int s=0;s<numStates;s++) {
|
||||
if (automaton.isAccept(s)) {
|
||||
int count = automaton.initTransition(s, t);
|
||||
boolean isSinkState = false;
|
||||
for(int i=0;i<count;i++) {
|
||||
automaton.getNextTransition(t);
|
||||
if (t.dest == s && t.min == 0 && t.max == 0xff) {
|
||||
isSinkState = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (isSinkState) {
|
||||
foundState = s;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return foundState;
|
||||
}
|
||||
|
||||
/** Create this. If finite is null, we use {@link Operations#isFinite}
|
||||
* to determine whether it is finite. If simplify is true, we run
|
||||
* possibly expensive operations to determine if the automaton is one
|
||||
|
@ -134,6 +163,7 @@ public class CompiledAutomaton {
|
|||
runAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
sinkState = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -154,6 +184,7 @@ public class CompiledAutomaton {
|
|||
runAutomaton = null;
|
||||
this.automaton = null;
|
||||
this.finite = null;
|
||||
sinkState = -1;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -174,7 +205,7 @@ public class CompiledAutomaton {
|
|||
} else {
|
||||
term = new BytesRef(UnicodeUtil.newString(singleton.ints, singleton.offset, singleton.length));
|
||||
}
|
||||
|
||||
sinkState = -1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
@ -202,7 +233,8 @@ public class CompiledAutomaton {
|
|||
if (this.finite) {
|
||||
commonSuffixRef = null;
|
||||
} else {
|
||||
// NOTE: this is a very costly operation! We should test if it's really warranted in practice...
|
||||
// NOTE: this is a very costly operation! We should test if it's really warranted in practice... we could do a fast match
|
||||
// by looking for a sink state (which means it has no common suffix). Or maybe we shouldn't do it when simplify is false?:
|
||||
BytesRef suffix = Operations.getCommonSuffixBytesRef(binary, maxDeterminizedStates);
|
||||
if (suffix.length == 0) {
|
||||
commonSuffixRef = null;
|
||||
|
@ -215,6 +247,10 @@ public class CompiledAutomaton {
|
|||
runAutomaton = new ByteRunAutomaton(binary, true, maxDeterminizedStates);
|
||||
|
||||
this.automaton = runAutomaton.automaton;
|
||||
|
||||
// TODO: this is a bit fragile because if the automaton is not minimized there could be more than 1 sink state but auto-prefix will fail
|
||||
// to run for those:
|
||||
sinkState = findSinkState(this.automaton);
|
||||
}
|
||||
|
||||
private Transition transition = new Transition();
|
||||
|
|
|
@ -599,7 +599,7 @@ public class RegExp {
|
|||
a = aa;
|
||||
break;
|
||||
case REGEXP_INTERVAL:
|
||||
a = Automata.makeInterval(min, max, digits);
|
||||
a = Automata.makeDecimalInterval(min, max, digits);
|
||||
break;
|
||||
}
|
||||
return a;
|
||||
|
|
|
@ -117,8 +117,8 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
assertAutomatonHits(2, Automata.makeString("doc"));
|
||||
assertAutomatonHits(1, Automata.makeChar('a'));
|
||||
assertAutomatonHits(2, Automata.makeCharRange('a', 'b'));
|
||||
assertAutomatonHits(2, Automata.makeInterval(1233, 2346, 0));
|
||||
assertAutomatonHits(1, Automata.makeInterval(0, 2000, 0));
|
||||
assertAutomatonHits(2, Automata.makeDecimalInterval(1233, 2346, 0));
|
||||
assertAutomatonHits(1, Automata.makeDecimalInterval(0, 2000, 0));
|
||||
assertAutomatonHits(2, Operations.union(Automata.makeChar('a'),
|
||||
Automata.makeChar('b')));
|
||||
assertAutomatonHits(0, Operations.intersection(Automata
|
||||
|
@ -194,7 +194,6 @@ public class TestAutomatonQuery extends LuceneTestCase {
|
|||
Automaton pfx = Automata.makeString("do");
|
||||
Automaton prefixAutomaton = Operations.concatenate(pfx, Automata.makeAnyString());
|
||||
AutomatonQuery aq = new AutomatonQuery(newTerm("bogus"), prefixAutomaton);
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), FN);
|
||||
assertEquals(3, automatonQueryNrHits(aq));
|
||||
}
|
||||
|
||||
|
|
|
@ -17,16 +17,19 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.FilteredTermsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -34,8 +37,6 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
||||
|
||||
static Directory dir, sdir1, sdir2;
|
||||
|
@ -152,14 +153,27 @@ public class TestMultiTermQueryRewrites extends LuceneTestCase {
|
|||
final MultiTermQuery mtq = new MultiTermQuery("data") {
|
||||
@Override
|
||||
protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException {
|
||||
return new TermRangeTermsEnum(terms.iterator(null), new BytesRef("2"), new BytesRef("7"), true, true) {
|
||||
return new FilteredTermsEnum(terms.iterator(null)) {
|
||||
|
||||
final BoostAttribute boostAtt =
|
||||
attributes().addAttribute(BoostAttribute.class);
|
||||
|
||||
@Override
|
||||
protected AcceptStatus accept(BytesRef term) {
|
||||
boostAtt.setBoost(Float.parseFloat(term.utf8ToString()));
|
||||
return super.accept(term);
|
||||
if (term.length == 0) {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
char c = (char) (term.bytes[term.offset] & 0xff);
|
||||
if (c >= '2') {
|
||||
if (c <= '7') {
|
||||
return AcceptStatus.YES;
|
||||
} else {
|
||||
return AcceptStatus.END;
|
||||
}
|
||||
} else {
|
||||
return AcceptStatus.NO;
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
|
|
@ -17,25 +17,32 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeImpl;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
|
@ -70,7 +77,6 @@ public class TestPrefixQuery extends LuceneTestCase {
|
|||
assertEquals("One in /Computers/Mac", 1, hits.length);
|
||||
|
||||
query = new PrefixQuery(new Term("category", ""));
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "category");
|
||||
hits = searcher.search(query, 1000).scoreDocs;
|
||||
assertEquals("everything", 3, hits.length);
|
||||
writer.close();
|
||||
|
@ -78,6 +84,92 @@ public class TestPrefixQuery extends LuceneTestCase {
|
|||
directory.close();
|
||||
}
|
||||
|
||||
/** Make sure auto prefix terms are used with PrefixQuery. */
|
||||
public void testAutoPrefixTermsKickIn() throws Exception {
|
||||
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
for(int i=1;i<5;i++) {
|
||||
char[] chars = new char[i];
|
||||
Arrays.fill(chars, 'a');
|
||||
prefixes.add(new String(chars));
|
||||
}
|
||||
|
||||
Set<String> randomTerms = new HashSet<>();
|
||||
int numTerms = atLeast(10000);
|
||||
while (randomTerms.size() < numTerms) {
|
||||
for(String prefix : prefixes) {
|
||||
randomTerms.add(prefix + TestUtil.randomRealisticUnicodeString(random()));
|
||||
}
|
||||
}
|
||||
|
||||
int actualCount = 0;
|
||||
for(String term : randomTerms) {
|
||||
if (term.startsWith("aa")) {
|
||||
actualCount++;
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("actual count " + actualCount);
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
|
||||
|
||||
// As long as this is never > actualCount, aa should always see at least one auto-prefix term:
|
||||
int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, actualCount);
|
||||
int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
|
||||
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
|
||||
minTermsAutoPrefix, maxTermsAutoPrefix)));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
for (String term : randomTerms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
}
|
||||
|
||||
w.forceMerge(1);
|
||||
IndexReader r = w.getReader();
|
||||
final Terms terms = MultiFields.getTerms(r, "field");
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
final int finalActualCount = actualCount;
|
||||
PrefixQuery q = new PrefixQuery(new Term("field", "aa")) {
|
||||
public PrefixQuery checkTerms() throws IOException {
|
||||
TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
|
||||
int count = 0;
|
||||
while (termsEnum.next() != null) {
|
||||
//System.out.println("got term: " + termsEnum.term().utf8ToString());
|
||||
count++;
|
||||
}
|
||||
|
||||
// Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
|
||||
assertTrue(count < finalActualCount);
|
||||
|
||||
return this;
|
||||
}
|
||||
}.checkTerms();
|
||||
|
||||
int x = BooleanQuery.getMaxClauseCount();
|
||||
try {
|
||||
BooleanQuery.setMaxClauseCount(randomTerms.size());
|
||||
if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||
} else if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
}
|
||||
|
||||
assertEquals(actualCount, s.search(q, 1).totalHits);
|
||||
} finally {
|
||||
BooleanQuery.setMaxClauseCount(x);
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMatchAll() throws Exception {
|
||||
Directory directory = newDirectory();
|
||||
|
||||
|
@ -92,8 +184,6 @@ public class TestPrefixQuery extends LuceneTestCase {
|
|||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
assertEquals(1, searcher.search(query, 1000).totalHits);
|
||||
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
|
||||
writer.close();
|
||||
reader.close();
|
||||
directory.close();
|
||||
|
|
|
@ -18,20 +18,32 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.codecs.autoprefix.AutoPrefixPostingsFormat;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
|
||||
|
||||
public class TestTermRangeQuery extends LuceneTestCase {
|
||||
|
@ -104,19 +116,18 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
initializeIndex(new String[]{"A", "B", "C", "D"});
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
|
||||
TermRangeQuery query = new TermRangeQuery("content", null, null, true, true);
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "content");
|
||||
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
|
||||
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
|
||||
query = new TermRangeQuery("content", null, null, false, false);
|
||||
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
|
||||
|
||||
query = TermRangeQuery.newStringRange("content", "", null, true, true);
|
||||
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
|
||||
|
||||
query = TermRangeQuery.newStringRange("content", "", null, true, false);
|
||||
assertFalse(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
|
||||
assertEquals(4, searcher.search(query, 1000).scoreDocs.length);
|
||||
// and now anothe one
|
||||
query = TermRangeQuery.newStringRange("content", "B", null, true, false);
|
||||
assertTrue(query.getTermsEnum(terms) instanceof TermRangeTermsEnum);
|
||||
|
||||
// and now another one
|
||||
query = TermRangeQuery.newStringRange("content", "B", null, true, true);
|
||||
assertEquals(3, searcher.search(query, 1000).scoreDocs.length);
|
||||
reader.close();
|
||||
}
|
||||
|
@ -336,4 +347,127 @@ public class TestTermRangeQuery extends LuceneTestCase {
|
|||
//assertEquals("C added => A,B,<empty string>,C in range", 3, hits.length());
|
||||
reader.close();
|
||||
}
|
||||
|
||||
/** Make sure auto prefix terms are used with TermRangeQuery */
|
||||
public void testAutoPrefixTermsKickIn() throws Exception {
|
||||
|
||||
List<String> prefixes = new ArrayList<>();
|
||||
for(int i=1;i<5;i++) {
|
||||
char[] chars = new char[i];
|
||||
Arrays.fill(chars, 'a');
|
||||
prefixes.add(new String(chars));
|
||||
}
|
||||
|
||||
Set<String> randomTerms = new HashSet<>();
|
||||
int numTerms = atLeast(10000);
|
||||
while (randomTerms.size() < numTerms) {
|
||||
for(String prefix : prefixes) {
|
||||
randomTerms.add(prefix + TestUtil.randomSimpleString(random()));
|
||||
}
|
||||
}
|
||||
|
||||
// We make term range aa<start> - aa<end>
|
||||
char start;
|
||||
char end;
|
||||
|
||||
int actualCount;
|
||||
boolean startInclusive = random().nextBoolean();
|
||||
boolean endInclusive = random().nextBoolean();
|
||||
String startTerm;
|
||||
String endTerm;
|
||||
|
||||
while (true) {
|
||||
start = (char) TestUtil.nextInt(random(), 'a', 'm');
|
||||
end = (char) TestUtil.nextInt(random(), start+1, 'z');
|
||||
|
||||
actualCount = 0;
|
||||
|
||||
startTerm = "aa" + start;
|
||||
endTerm = "aa" + end;
|
||||
|
||||
for(String term : randomTerms) {
|
||||
int cmpStart = startTerm.compareTo(term);
|
||||
int cmpEnd = endTerm.compareTo(term);
|
||||
if ((cmpStart < 0 || (startInclusive && cmpStart == 0)) &&
|
||||
(cmpEnd > 0 || (endInclusive && cmpEnd == 0))) {
|
||||
actualCount++;
|
||||
}
|
||||
}
|
||||
|
||||
if (actualCount > 2000) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
//System.out.println("start " + startTerm + " inclusive? " + startInclusive);
|
||||
//System.out.println("end " + endTerm + " inclusive? " + endInclusive);
|
||||
//System.out.println("actual count " + actualCount);
|
||||
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
|
||||
int minTermsInBlock = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsInBlock = Math.max(2, (minTermsInBlock-1)*2 + random().nextInt(100));
|
||||
|
||||
int minTermsAutoPrefix = TestUtil.nextInt(random(), 2, 100);
|
||||
int maxTermsAutoPrefix = random().nextBoolean() ? Math.max(2, (minTermsAutoPrefix-1)*2 + random().nextInt(100)) : Integer.MAX_VALUE;
|
||||
|
||||
//System.out.println("minTermsAutoPrefix " + minTermsAutoPrefix);
|
||||
//System.out.println("maxTermsAutoPrefix " + maxTermsAutoPrefix);
|
||||
|
||||
iwc.setCodec(TestUtil.alwaysPostingsFormat(new AutoPrefixPostingsFormat(minTermsInBlock, maxTermsInBlock,
|
||||
minTermsAutoPrefix, maxTermsAutoPrefix)));
|
||||
RandomIndexWriter w = new RandomIndexWriter(random(), dir, iwc);
|
||||
|
||||
//System.out.println("TEST: index terms");
|
||||
for (String term : randomTerms) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("field", term, Field.Store.NO));
|
||||
w.addDocument(doc);
|
||||
//System.out.println(" " + term);
|
||||
}
|
||||
|
||||
//System.out.println("TEST: now force merge");
|
||||
w.forceMerge(1);
|
||||
IndexReader r = w.getReader();
|
||||
final Terms terms = MultiFields.getTerms(r, "field");
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
final int finalActualCount = actualCount;
|
||||
//System.out.println("start=" + startTerm + " end=" + endTerm + " startIncl=" + startInclusive + " endIncl=" + endInclusive);
|
||||
TermRangeQuery q = new TermRangeQuery("field", new BytesRef(startTerm), new BytesRef(endTerm), startInclusive, endInclusive) {
|
||||
public TermRangeQuery checkTerms() throws IOException {
|
||||
TermsEnum termsEnum = getTermsEnum(terms, new AttributeSource());
|
||||
int count = 0;
|
||||
while (termsEnum.next() != null) {
|
||||
//System.out.println("got term: " + termsEnum.term().utf8ToString());
|
||||
count++;
|
||||
}
|
||||
//System.out.println("count " + count + " vs finalActualCount=" + finalActualCount);
|
||||
|
||||
// Auto-prefix term(s) should have kicked in, so we should have visited fewer than the total number of aa* terms:
|
||||
assertTrue(count < finalActualCount);
|
||||
|
||||
return this;
|
||||
}
|
||||
}.checkTerms();
|
||||
|
||||
if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_REWRITE);
|
||||
} else if (random().nextBoolean()) {
|
||||
q.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE);
|
||||
}
|
||||
|
||||
assertEquals(actualCount, s.search(q, 1).totalHits);
|
||||
|
||||
// Test when min == max:
|
||||
List<String> randomTermsList = new ArrayList<>(randomTerms);
|
||||
for(int iter=0;iter<100*RANDOM_MULTIPLIER;iter++) {
|
||||
String term = randomTermsList.get(random().nextInt(randomTermsList.size()));
|
||||
q = new TermRangeQuery("field", new BytesRef(term), new BytesRef(term), true, true);
|
||||
assertEquals(1, s.search(q, 1).totalHits);
|
||||
}
|
||||
|
||||
r.close();
|
||||
w.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -34,14 +34,8 @@ import java.io.IOException;
|
|||
/**
|
||||
* TestWildcard tests the '*' and '?' wildcard characters.
|
||||
*/
|
||||
public class TestWildcard
|
||||
extends LuceneTestCase {
|
||||
public class TestWildcard extends LuceneTestCase {
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
}
|
||||
|
||||
public void testEquals() {
|
||||
WildcardQuery wq1 = new WildcardQuery(new Term("field", "b*a"));
|
||||
WildcardQuery wq2 = new WildcardQuery(new Term("field", "b*a"));
|
||||
|
@ -126,10 +120,10 @@ public class TestWildcard
|
|||
|
||||
MultiTermQuery wq = new WildcardQuery(new Term("field", "prefix*"));
|
||||
assertMatches(searcher, wq, 2);
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
|
||||
|
||||
wq = new WildcardQuery(new Term("field", "*"));
|
||||
assertMatches(searcher, wq, 2);
|
||||
Terms terms = MultiFields.getTerms(searcher.getIndexReader(), "field");
|
||||
assertFalse(wq.getTermsEnum(terms).getClass().getSimpleName().contains("AutomatonTermsEnum"));
|
||||
reader.close();
|
||||
indexStore.close();
|
||||
|
|
|
@ -232,7 +232,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testInterval() throws Exception {
|
||||
Automaton a = Operations.determinize(Automata.makeInterval(17, 100, 3),
|
||||
Automaton a = Operations.determinize(Automata.makeDecimalInterval(17, 100, 3),
|
||||
DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
assertFalse(Operations.run(a, ""));
|
||||
assertTrue(Operations.run(a, "017"));
|
||||
|
@ -431,7 +431,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testOneInterval() throws Exception {
|
||||
Automaton a = Automata.makeInterval(999, 1032, 0);
|
||||
Automaton a = Automata.makeDecimalInterval(999, 1032, 0);
|
||||
a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
assertTrue(Operations.run(a, "0999"));
|
||||
assertTrue(Operations.run(a, "00999"));
|
||||
|
@ -439,7 +439,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testAnotherInterval() throws Exception {
|
||||
Automaton a = Automata.makeInterval(1, 2, 0);
|
||||
Automaton a = Automata.makeDecimalInterval(1, 2, 0);
|
||||
a = Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
assertTrue(Operations.run(a, "01"));
|
||||
}
|
||||
|
@ -462,7 +462,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
}
|
||||
String prefix = b.toString();
|
||||
|
||||
Automaton a = Operations.determinize(Automata.makeInterval(min, max, digits),
|
||||
Automaton a = Operations.determinize(Automata.makeDecimalInterval(min, max, digits),
|
||||
DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
if (random().nextBoolean()) {
|
||||
a = MinimizationOperations.minimize(a, DEFAULT_MAX_DETERMINIZED_STATES);
|
||||
|
@ -942,7 +942,7 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
if (VERBOSE) {
|
||||
System.out.println(" op=union interval min=" + min + " max=" + max + " digits=" + digits);
|
||||
}
|
||||
a = Operations.union(a, Automata.makeInterval(min, max, digits));
|
||||
a = Operations.union(a, Automata.makeDecimalInterval(min, max, digits));
|
||||
StringBuilder b = new StringBuilder();
|
||||
for(int i=0;i<digits;i++) {
|
||||
b.append('0');
|
||||
|
@ -1105,6 +1105,138 @@ public class TestAutomaton extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testMakeBinaryIntervalRandom() throws Exception {
|
||||
int iters = atLeast(100);
|
||||
for(int iter=0;iter<iters;iter++) {
|
||||
BytesRef minTerm = TestUtil.randomBinaryTerm(random());
|
||||
boolean minInclusive = random().nextBoolean();
|
||||
BytesRef maxTerm = TestUtil.randomBinaryTerm(random());
|
||||
boolean maxInclusive = random().nextBoolean();
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println("TEST: iter=" + iter + " minTerm=" + minTerm + " minInclusive=" + minInclusive + " maxTerm=" + maxTerm + " maxInclusive=" + maxInclusive);
|
||||
}
|
||||
|
||||
Automaton a = Automata.makeBinaryInterval(minTerm, minInclusive, maxTerm, maxInclusive);
|
||||
|
||||
Automaton minA = MinimizationOperations.minimize(a, Integer.MAX_VALUE);
|
||||
if (minA.getNumStates() != a.getNumStates()) {
|
||||
assertTrue(minA.getNumStates() < a.getNumStates());
|
||||
System.out.println("Original was not minimal:");
|
||||
System.out.println("Original:\n" + a.toDot());
|
||||
System.out.println("Minimized:\n" + minA.toDot());
|
||||
fail("auotmaton was not minimal");
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(a.toDot());
|
||||
}
|
||||
|
||||
for(int iter2=0;iter2<500;iter2++) {
|
||||
BytesRef term = TestUtil.randomBinaryTerm(random());
|
||||
int minCmp = minTerm.compareTo(term);
|
||||
int maxCmp = maxTerm.compareTo(term);
|
||||
|
||||
boolean expected;
|
||||
if (minCmp > 0 || maxCmp < 0) {
|
||||
expected = false;
|
||||
} else if (minCmp == 0 && maxCmp == 0) {
|
||||
expected = minInclusive && maxInclusive;
|
||||
} else if (minCmp == 0) {
|
||||
expected = minInclusive;
|
||||
} else if (maxCmp == 0) {
|
||||
expected = maxInclusive;
|
||||
} else {
|
||||
expected = true;
|
||||
}
|
||||
|
||||
if (VERBOSE) {
|
||||
System.out.println(" check term=" + term + " expected=" + expected);
|
||||
}
|
||||
IntsRefBuilder intsBuilder = new IntsRefBuilder();
|
||||
Util.toIntsRef(term, intsBuilder);
|
||||
assertEquals(expected, Operations.run(a, intsBuilder.toIntsRef()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static IntsRef intsRef(String s) {
|
||||
IntsRefBuilder intsBuilder = new IntsRefBuilder();
|
||||
Util.toIntsRef(new BytesRef(s), intsBuilder);
|
||||
return intsBuilder.toIntsRef();
|
||||
}
|
||||
|
||||
public void testMakeBinaryIntervalBasic() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("foo"), true);
|
||||
assertTrue(Operations.run(a, intsRef("bar")));
|
||||
assertTrue(Operations.run(a, intsRef("foo")));
|
||||
assertTrue(Operations.run(a, intsRef("beep")));
|
||||
assertFalse(Operations.run(a, intsRef("baq")));
|
||||
assertTrue(Operations.run(a, intsRef("bara")));
|
||||
}
|
||||
|
||||
public void testMakeBinaryIntervalEqual() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("bar"), true);
|
||||
assertTrue(Operations.run(a, intsRef("bar")));
|
||||
assertTrue(Operations.isFinite(a));
|
||||
assertEquals(1, Operations.getFiniteStrings(a, 10).size());
|
||||
}
|
||||
|
||||
public void testMakeBinaryIntervalCommonPrefix() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, new BytesRef("barfoo"), true);
|
||||
assertFalse(Operations.run(a, intsRef("bam")));
|
||||
assertTrue(Operations.run(a, intsRef("bar")));
|
||||
assertTrue(Operations.run(a, intsRef("bara")));
|
||||
assertTrue(Operations.run(a, intsRef("barf")));
|
||||
assertTrue(Operations.run(a, intsRef("barfo")));
|
||||
assertTrue(Operations.run(a, intsRef("barfoo")));
|
||||
assertTrue(Operations.run(a, intsRef("barfonz")));
|
||||
assertFalse(Operations.run(a, intsRef("barfop")));
|
||||
assertFalse(Operations.run(a, intsRef("barfoop")));
|
||||
}
|
||||
|
||||
public void testMakeBinaryIntervalOpenMax() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(new BytesRef("bar"), true, null, true);
|
||||
assertFalse(Operations.run(a, intsRef("bam")));
|
||||
assertTrue(Operations.run(a, intsRef("bar")));
|
||||
assertTrue(Operations.run(a, intsRef("bara")));
|
||||
assertTrue(Operations.run(a, intsRef("barf")));
|
||||
assertTrue(Operations.run(a, intsRef("barfo")));
|
||||
assertTrue(Operations.run(a, intsRef("barfoo")));
|
||||
assertTrue(Operations.run(a, intsRef("barfonz")));
|
||||
assertTrue(Operations.run(a, intsRef("barfop")));
|
||||
assertTrue(Operations.run(a, intsRef("barfoop")));
|
||||
assertTrue(Operations.run(a, intsRef("zzz")));
|
||||
}
|
||||
|
||||
public void testMakeBinaryIntervalOpenMin() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(null, true, new BytesRef("foo"), true);
|
||||
assertFalse(Operations.run(a, intsRef("foz")));
|
||||
assertFalse(Operations.run(a, intsRef("zzz")));
|
||||
assertTrue(Operations.run(a, intsRef("foo")));
|
||||
assertTrue(Operations.run(a, intsRef("")));
|
||||
assertTrue(Operations.run(a, intsRef("a")));
|
||||
assertTrue(Operations.run(a, intsRef("aaa")));
|
||||
assertTrue(Operations.run(a, intsRef("bz")));
|
||||
}
|
||||
|
||||
public void testMakeBinaryIntervalOpenBoth() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(null, true, null, true);
|
||||
assertTrue(Operations.run(a, intsRef("foz")));
|
||||
assertTrue(Operations.run(a, intsRef("zzz")));
|
||||
assertTrue(Operations.run(a, intsRef("foo")));
|
||||
assertTrue(Operations.run(a, intsRef("")));
|
||||
assertTrue(Operations.run(a, intsRef("a")));
|
||||
assertTrue(Operations.run(a, intsRef("aaa")));
|
||||
assertTrue(Operations.run(a, intsRef("bz")));
|
||||
}
|
||||
|
||||
public void testAcceptAllEmptyStringMin() throws Exception {
|
||||
Automaton a = Automata.makeBinaryInterval(new BytesRef(), true, null, true);
|
||||
System.out.println("HERE: " + a.toDot());
|
||||
assertTrue(Operations.sameLanguage(Automata.makeAnyBinary(), a));
|
||||
}
|
||||
|
||||
private static IntsRef toIntsRef(String s) {
|
||||
IntsRefBuilder b = new IntsRefBuilder();
|
||||
for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) {
|
||||
|
|
|
@ -257,14 +257,14 @@ public class AssertingLeafReader extends FilterLeafReader {
|
|||
public TermState termState() throws IOException {
|
||||
assertThread("Terms enums", creationThread);
|
||||
assert state == State.POSITIONED : "termState() called on unpositioned TermsEnum";
|
||||
return super.termState();
|
||||
return in.termState();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seekExact(BytesRef term, TermState state) throws IOException {
|
||||
assertThread("Terms enums", creationThread);
|
||||
assert term.isValid();
|
||||
super.seekExact(term, state);
|
||||
in.seekExact(term, state);
|
||||
this.state = State.POSITIONED;
|
||||
}
|
||||
|
||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
@ -831,6 +831,14 @@ public final class TestUtil {
|
|||
return new String(buffer, 0, i);
|
||||
}
|
||||
|
||||
/** Returns a random binary term. */
|
||||
public static BytesRef randomBinaryTerm(Random r) {
|
||||
int length = r.nextInt(15);
|
||||
BytesRef b = new BytesRef(length);
|
||||
r.nextBytes(b.bytes);
|
||||
b.length = length;
|
||||
return b;
|
||||
}
|
||||
|
||||
/** Return a Codec that can read any of the
|
||||
* default codecs and formats, but always writes in the specified
|
||||
|
@ -858,7 +866,7 @@ public final class TestUtil {
|
|||
// (and maybe their params, too) to infostream on flush and merge.
|
||||
// otherwise in a real debugging situation we won't know whats going on!
|
||||
if (LuceneTestCase.VERBOSE) {
|
||||
System.out.println("forcing docvalues format to:" + format);
|
||||
System.out.println("TestUtil: forcing docvalues format to:" + format);
|
||||
}
|
||||
return new AssertingCodec() {
|
||||
@Override
|
||||
|
@ -1282,6 +1290,24 @@ public final class TestUtil {
|
|||
return sb.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/** For debugging: tries to include br.utf8ToString(), but if that
|
||||
* fails (because it's not valid utf8, which is fine!), just
|
||||
* use ordinary toString. */
|
||||
public static String bytesRefToString(BytesRef br) {
|
||||
if (br == null) {
|
||||
return "(null)";
|
||||
} else {
|
||||
try {
|
||||
return br.utf8ToString() + " " + br.toString();
|
||||
} catch (IllegalArgumentException t) {
|
||||
// If BytesRef isn't actually UTF8, or it's eg a
|
||||
// prefix of UTF8 that ends mid-unicode-char, we
|
||||
// fallback to hex:
|
||||
return br.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns a copy of directory, entirely in RAM */
|
||||
public static RAMDirectory ramCopyOf(Directory dir) throws IOException {
|
||||
|
|
|
@ -17,6 +17,12 @@
|
|||
|
||||
package org.apache.solr.client.solrj.embedded;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
|
@ -42,11 +48,7 @@ import org.apache.solr.response.ResultContext;
|
|||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.servlet.SolrRequestParsers;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.nio.file.Path;
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
/**
|
||||
* SolrClient that connects directly to a CoreContainer.
|
||||
|
@ -170,7 +172,7 @@ public class EmbeddedSolrServer extends SolrClient {
|
|||
}
|
||||
|
||||
req = _parser.buildRequestFrom(core, params, request.getContentStreams());
|
||||
req.getContext().put("path", path);
|
||||
req.getContext().put(PATH, path);
|
||||
SolrQueryResponse rsp = new SolrQueryResponse();
|
||||
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
|
||||
|
||||
|
|
|
@ -17,6 +17,22 @@ package org.apache.solr.cloud;
|
|||
* the License.
|
||||
*/
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.cloud.overseer.ClusterStateMutator;
|
||||
|
@ -47,25 +63,10 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.ListIterator;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.apache.solr.cloud.OverseerCollectionProcessor.ONLY_ACTIVE_NODES;
|
||||
import static org.apache.solr.cloud.OverseerCollectionProcessor.SHARD_UNIQUE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.BALANCESHARDUNIQUE;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* Cluster leader. Responsible for processing state updates, node assignments, creating/deleting
|
||||
|
@ -397,7 +398,7 @@ public class Overseer implements Closeable {
|
|||
}
|
||||
|
||||
private void handleProp(ZkNodeProps message) {
|
||||
String name = message.getStr("name");
|
||||
String name = message.getStr(NAME);
|
||||
String val = message.getStr("val");
|
||||
Map m = reader.getClusterProps();
|
||||
if(val ==null) m.remove(name);
|
||||
|
|
|
@ -17,8 +17,27 @@ package org.apache.solr.cloud;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
|
@ -44,7 +63,6 @@ import org.apache.solr.common.cloud.PlainIdRouter;
|
|||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.RoutingRule;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.Slice.State;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkConfigManager;
|
||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
|
@ -76,26 +94,6 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
import org.slf4j.MDC;
|
||||
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.SynchronousQueue;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.apache.solr.cloud.Assign.getNodesForNewShard;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.BASE_URL_PROP;
|
||||
import static org.apache.solr.common.cloud.ZkStateReader.COLLECTION_PROP;
|
||||
|
@ -118,6 +116,7 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.DE
|
|||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETEREPLICAPROP;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.DELETESHARD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
|
||||
public class OverseerCollectionProcessor implements Runnable, Closeable {
|
||||
|
@ -326,7 +325,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
for (QueueEvent head : heads) {
|
||||
final ZkNodeProps message = ZkNodeProps.load(head.getBytes());
|
||||
String collectionName = message.containsKey(COLLECTION_PROP) ?
|
||||
message.getStr(COLLECTION_PROP) : message.getStr("name");
|
||||
message.getStr(COLLECTION_PROP) : message.getStr(NAME);
|
||||
String asyncId = message.getStr(ASYNC);
|
||||
if (hasLeftOverItems) {
|
||||
if (head.getId().equals(oldestItemInWorkQueue))
|
||||
|
@ -381,7 +380,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
|
||||
private boolean checkExclusivity(ZkNodeProps message, String id) throws KeeperException, InterruptedException {
|
||||
String collectionName = message.containsKey(COLLECTION_PROP) ?
|
||||
message.getStr(COLLECTION_PROP) : message.getStr("name");
|
||||
message.getStr(COLLECTION_PROP) : message.getStr(NAME);
|
||||
|
||||
if(collectionName == null)
|
||||
return true;
|
||||
|
@ -639,7 +638,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
}
|
||||
} catch (Exception e) {
|
||||
String collName = message.getStr("collection");
|
||||
if (collName == null) collName = message.getStr("name");
|
||||
if (collName == null) collName = message.getStr(NAME);
|
||||
|
||||
if (collName == null) {
|
||||
SolrException.log(log, "Operation " + operation + " failed", e);
|
||||
|
@ -1127,7 +1126,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
|
||||
private void deleteCollection(ZkNodeProps message, NamedList results)
|
||||
throws KeeperException, InterruptedException {
|
||||
final String collection = message.getStr("name");
|
||||
final String collection = message.getStr(NAME);
|
||||
try {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(CoreAdminParams.ACTION, CoreAdminAction.UNLOAD.toString());
|
||||
|
@ -1137,7 +1136,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
null);
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
DELETE.toLower(), "name", collection);
|
||||
DELETE.toLower(), NAME, collection);
|
||||
Overseer.getInQueue(zkStateReader.getZkClient()).offer(
|
||||
ZkStateReader.toJSON(m));
|
||||
|
||||
|
@ -1179,7 +1178,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
}
|
||||
|
||||
private void createAlias(Aliases aliases, ZkNodeProps message) {
|
||||
String aliasName = message.getStr("name");
|
||||
String aliasName = message.getStr(NAME);
|
||||
String collections = message.getStr("collections");
|
||||
|
||||
Map previousMDCContext = MDC.getCopyOfContextMap();
|
||||
|
@ -1258,7 +1257,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
}
|
||||
|
||||
private void deleteAlias(Aliases aliases, ZkNodeProps message) {
|
||||
String aliasName = message.getStr("name");
|
||||
String aliasName = message.getStr(NAME);
|
||||
Map previousMDCContext = MDC.getCopyOfContextMap();
|
||||
MDCUtils.setCollection(aliasName);
|
||||
|
||||
|
@ -1321,7 +1320,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
if (created) break;
|
||||
}
|
||||
if (!created)
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully create shard: " + message.getStr("name"));
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully create shard: " + message.getStr(NAME));
|
||||
|
||||
|
||||
String configName = message.getStr(COLL_CONF);
|
||||
|
@ -1996,7 +1995,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
log.info("Deleting temporary collection: " + tempSourceCollectionName);
|
||||
Map<String, Object> props = ZkNodeProps.makeMap(
|
||||
Overseer.QUEUE_OPERATION, DELETE.toLower(),
|
||||
"name", tempSourceCollectionName);
|
||||
NAME, tempSourceCollectionName);
|
||||
|
||||
try {
|
||||
deleteCollection(new ZkNodeProps(props), results);
|
||||
|
@ -2080,7 +2079,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
String configName = zkStateReader.readConfigName(sourceCollection.getName());
|
||||
Map<String, Object> props = ZkNodeProps.makeMap(
|
||||
Overseer.QUEUE_OPERATION, CREATE.toLower(),
|
||||
"name", tempSourceCollectionName,
|
||||
NAME, tempSourceCollectionName,
|
||||
ZkStateReader.REPLICATION_FACTOR, 1,
|
||||
NUM_SLICES, 1,
|
||||
COLL_CONF, configName,
|
||||
|
@ -2209,7 +2208,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
log.info("Deleting temporary collection: " + tempSourceCollectionName);
|
||||
props = ZkNodeProps.makeMap(
|
||||
Overseer.QUEUE_OPERATION, DELETE.toLower(),
|
||||
"name", tempSourceCollectionName);
|
||||
NAME, tempSourceCollectionName);
|
||||
deleteCollection(new ZkNodeProps(props), results);
|
||||
} catch (Exception e) {
|
||||
log.error("Unable to delete temporary collection: " + tempSourceCollectionName
|
||||
|
@ -2290,7 +2289,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
}
|
||||
|
||||
private void createCollection(ClusterState clusterState, ZkNodeProps message, NamedList results) throws KeeperException, InterruptedException {
|
||||
String collectionName = message.getStr("name");
|
||||
String collectionName = message.getStr(NAME);
|
||||
if (clusterState.hasCollection(collectionName)) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "collection already exists: " + collectionName);
|
||||
}
|
||||
|
@ -2376,11 +2375,11 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
boolean created = false;
|
||||
while (System.nanoTime() < waitUntil) {
|
||||
Thread.sleep(100);
|
||||
created = zkStateReader.getClusterState().getCollections().contains(message.getStr("name"));
|
||||
created = zkStateReader.getClusterState().getCollections().contains(message.getStr(NAME));
|
||||
if(created) break;
|
||||
}
|
||||
if (!created)
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully create collection: " + message.getStr("name"));
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not fully create collection: " + message.getStr(NAME));
|
||||
|
||||
// For tracking async calls.
|
||||
HashMap<String, String> requestMap = new HashMap<String, String>();
|
||||
|
@ -2664,7 +2663,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
|
||||
private void collectionCmd(ClusterState clusterState, ZkNodeProps message, ModifiableSolrParams params, NamedList results, String stateMatcher) {
|
||||
log.info("Executing Collection Cmd : " + params);
|
||||
String collectionName = message.getStr("name");
|
||||
String collectionName = message.getStr(NAME);
|
||||
ShardHandler shardHandler = shardHandlerFactory.getShardHandler();
|
||||
|
||||
DocCollection coll = clusterState.getCollection(collectionName);
|
||||
|
@ -2856,7 +2855,7 @@ public class OverseerCollectionProcessor implements Runnable, Closeable {
|
|||
boolean success = false;
|
||||
String asyncId = message.getStr(ASYNC);
|
||||
String collectionName = message.containsKey(COLLECTION_PROP) ?
|
||||
message.getStr(COLLECTION_PROP) : message.getStr("name");
|
||||
message.getStr(COLLECTION_PROP) : message.getStr(NAME);
|
||||
Map previousMDCContext = MDC.getCopyOfContextMap();
|
||||
MDCUtils.setCollection(collectionName);
|
||||
try {
|
||||
|
|
|
@ -2282,7 +2282,7 @@ public final class ZkController {
|
|||
}
|
||||
});
|
||||
} else {
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "This conf directory is not valid");
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "This conf directory is not valid "+ confDir);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -37,7 +37,7 @@ import org.apache.solr.common.cloud.ZkStateReader;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class ClusterStateMutator {
|
||||
private static Logger log = LoggerFactory.getLogger(ClusterStateMutator.class);
|
||||
|
@ -49,7 +49,7 @@ public class ClusterStateMutator {
|
|||
}
|
||||
|
||||
public ZkWriteCommand createCollection(ClusterState clusterState, ZkNodeProps message) {
|
||||
String cName = message.getStr("name");
|
||||
String cName = message.getStr(NAME);
|
||||
log.info("building a new cName: " + cName);
|
||||
if (clusterState.hasCollection(cName)) {
|
||||
log.warn("Collection {} already exists. exit", cName);
|
||||
|
@ -68,7 +68,7 @@ public class ClusterStateMutator {
|
|||
}
|
||||
|
||||
Map<String, Object> routerSpec = DocRouter.getRouterSpec(message);
|
||||
String routerName = routerSpec.get("name") == null ? DocRouter.DEFAULT_NAME : (String) routerSpec.get("name");
|
||||
String routerName = routerSpec.get(NAME) == null ? DocRouter.DEFAULT_NAME : (String) routerSpec.get(NAME);
|
||||
DocRouter router = DocRouter.getDocRouter(routerName);
|
||||
|
||||
List<DocRouter.Range> ranges = router.partitionRange(shards.size(), router.fullRange());
|
||||
|
@ -110,8 +110,8 @@ public class ClusterStateMutator {
|
|||
}
|
||||
|
||||
public ZkWriteCommand deleteCollection(ClusterState clusterState, ZkNodeProps message) {
|
||||
final String collection = message.getStr("name");
|
||||
if (!CollectionMutator.checkKeyExistence(message, "name")) return ZkStateWriter.NO_OP;
|
||||
final String collection = message.getStr(NAME);
|
||||
if (!CollectionMutator.checkKeyExistence(message, NAME)) return ZkStateWriter.NO_OP;
|
||||
DocCollection coll = clusterState.getCollectionOrNull(collection);
|
||||
if (coll == null) return ZkStateWriter.NO_OP;
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ import org.apache.solr.common.cloud.ZkStateReader;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class CollectionMutator {
|
||||
private static Logger log = LoggerFactory.getLogger(CollectionMutator.class);
|
||||
|
||||
|
@ -92,7 +94,7 @@ public class CollectionMutator {
|
|||
slices = new LinkedHashMap<>(1);
|
||||
slices.put(slice.getName(), slice);
|
||||
Map<String, Object> props = new HashMap<>(1);
|
||||
props.put(DocCollection.DOC_ROUTER, ZkNodeProps.makeMap("name", ImplicitDocRouter.NAME));
|
||||
props.put(DocCollection.DOC_ROUTER, ZkNodeProps.makeMap(NAME, ImplicitDocRouter.NAME));
|
||||
newCollection = new DocCollection(collectionName, slices, props, new ImplicitDocRouter());
|
||||
} else {
|
||||
slices = new LinkedHashMap<>(collection.getSlicesMap()); // make a shallow copy
|
||||
|
|
|
@ -17,9 +17,6 @@ package org.apache.solr.cloud.overseer;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.solr.cloud.OverseerCollectionProcessor.*;
|
||||
import static org.apache.solr.cloud.overseer.CollectionMutator.*;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
|
@ -42,6 +39,11 @@ import org.apache.solr.common.cloud.ZkStateReader;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.cloud.OverseerCollectionProcessor.COLL_PROP_PREFIX;
|
||||
import static org.apache.solr.cloud.overseer.CollectionMutator.checkCollectionKeyExistence;
|
||||
import static org.apache.solr.cloud.overseer.CollectionMutator.checkKeyExistence;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class ReplicaMutator {
|
||||
private static Logger log = LoggerFactory.getLogger(ReplicaMutator.class);
|
||||
|
||||
|
@ -214,7 +216,7 @@ public class ReplicaMutator {
|
|||
boolean collectionExists = prevState.hasCollection(cName);
|
||||
if (!collectionExists && numShards != null) {
|
||||
ClusterStateMutator.getShardNames(numShards, shardNames);
|
||||
Map<String, Object> createMsg = ZkNodeProps.makeMap("name", cName);
|
||||
Map<String, Object> createMsg = ZkNodeProps.makeMap(NAME, cName);
|
||||
createMsg.putAll(message.getProperties());
|
||||
writeCommand = new ClusterStateMutator(zkStateReader).createCollection(prevState, new ZkNodeProps(createMsg));
|
||||
DocCollection collection = writeCommand.collection;
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
|
||||
import org.apache.solr.cloud.Assign;
|
||||
import org.apache.solr.cloud.Overseer;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
|
@ -40,6 +39,7 @@ import org.slf4j.LoggerFactory;
|
|||
import static org.apache.solr.cloud.OverseerCollectionProcessor.COLL_PROP_PREFIX;
|
||||
import static org.apache.solr.cloud.overseer.CollectionMutator.checkCollectionKeyExistence;
|
||||
import static org.apache.solr.common.cloud.ZkNodeProps.makeMap;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class SliceMutator {
|
||||
private static Logger log = LoggerFactory.getLogger(SliceMutator.class);
|
||||
|
@ -123,7 +123,7 @@ public class SliceMutator {
|
|||
// if there are no slices left in the collection, remove it?
|
||||
if (newSlices.size() == 0) {
|
||||
return new ClusterStateMutator(zkStateReader).deleteCollection(clusterState,
|
||||
new ZkNodeProps(ZkNodeProps.makeMap("name", collection)));
|
||||
new ZkNodeProps(ZkNodeProps.makeMap(NAME, collection)));
|
||||
} else {
|
||||
return new ZkWriteCommand(collection, coll.copyWithSlices(newSlices));
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collections;
|
|||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.handler.PingRequestHandler;
|
||||
import org.apache.solr.handler.RealTimeGetHandler;
|
||||
|
@ -43,6 +44,9 @@ import org.apache.solr.request.SolrRequestHandler;
|
|||
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.apache.solr.common.cloud.ZkNodeProps.makeMap;
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
import static org.apache.solr.common.params.CommonParams.WT;
|
||||
import static org.apache.solr.core.PluginInfo.DEFAULTS;
|
||||
import static org.apache.solr.core.PluginInfo.INVARIANTS;
|
||||
|
||||
|
@ -66,9 +70,9 @@ public class ImplicitPlugins {
|
|||
|
||||
implicits.add(getReqHandlerInfo("/get", RealTimeGetHandler.class,
|
||||
makeMap(
|
||||
"omitHeader", "true",
|
||||
"wt", "json",
|
||||
"indent", "true")));
|
||||
"omitHeader", "true",
|
||||
WT, JSON,
|
||||
"indent", "true")));
|
||||
//register adminHandlers
|
||||
implicits.add(getReqHandlerInfo("/admin/luke", LukeRequestHandler.class, null));
|
||||
implicits.add(getReqHandlerInfo("/admin/system", SystemInfoHandler.class, null));
|
||||
|
@ -87,8 +91,7 @@ public class ImplicitPlugins {
|
|||
|
||||
public static PluginInfo getReqHandlerInfo(String name, Class clz, Map defaults){
|
||||
if(defaults == null) defaults= Collections.emptyMap();
|
||||
Map m = makeMap("name", name, "class", clz.getName());
|
||||
Map m = makeMap(NAME, name, "class", clz.getName());
|
||||
return new PluginInfo(SolrRequestHandler.TYPE, m, new NamedList<>(singletonMap(DEFAULTS, new NamedList(defaults))),null);
|
||||
}
|
||||
public static final String IMPLICIT = "implicit";
|
||||
}
|
||||
|
|
|
@ -17,15 +17,21 @@ package org.apache.solr.core;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
import static org.apache.solr.core.PluginInfo.APPENDS;
|
||||
import static org.apache.solr.core.PluginInfo.DEFAULTS;
|
||||
import static org.apache.solr.core.PluginInfo.INVARIANTS;
|
||||
|
||||
/**
|
||||
* An Object which represents a {@code <initParams>} tag
|
||||
*/
|
||||
|
@ -36,18 +42,18 @@ public class InitParams {
|
|||
public final NamedList defaults, invariants, appends;
|
||||
|
||||
public InitParams(PluginInfo p) {
|
||||
this.name = p.attributes.get("name");
|
||||
this.name = p.attributes.get(NAME);
|
||||
Set<String> paths = null;
|
||||
String pathStr = p.attributes.get("path");
|
||||
String pathStr = p.attributes.get(PATH);
|
||||
if (pathStr != null) {
|
||||
paths = Collections.unmodifiableSet(new HashSet<>(StrUtils.splitSmart(pathStr, ',')));
|
||||
}
|
||||
this.paths = paths;
|
||||
NamedList nl = (NamedList) p.initArgs.get(PluginInfo.DEFAULTS);
|
||||
NamedList nl = (NamedList) p.initArgs.get(DEFAULTS);
|
||||
defaults = nl == null ? null : nl.getImmutableCopy();
|
||||
nl = (NamedList) p.initArgs.get(PluginInfo.INVARIANTS);
|
||||
nl = (NamedList) p.initArgs.get(INVARIANTS);
|
||||
invariants = nl == null ? null : nl.getImmutableCopy();
|
||||
nl = (NamedList) p.initArgs.get(PluginInfo.APPENDS);
|
||||
nl = (NamedList) p.initArgs.get(APPENDS);
|
||||
appends = nl == null ? null : nl.getImmutableCopy();
|
||||
}
|
||||
|
||||
|
@ -83,13 +89,13 @@ public class InitParams {
|
|||
public void apply(PluginInfo info) {
|
||||
if (!info.isFromSolrConfig()) {
|
||||
//if this is a component implicitly defined in code it should be overridden by initPrams
|
||||
merge(defaults, (NamedList) info.initArgs.get(PluginInfo.DEFAULTS), info.initArgs, PluginInfo.DEFAULTS, false);
|
||||
merge(defaults, (NamedList) info.initArgs.get(DEFAULTS), info.initArgs, DEFAULTS, false);
|
||||
} else {
|
||||
//if the args is initialized from solrconfig.xml inside the requesthHandler it should be taking precedence over initParams
|
||||
merge((NamedList) info.initArgs.get(PluginInfo.DEFAULTS), defaults, info.initArgs, PluginInfo.DEFAULTS, false);
|
||||
merge((NamedList) info.initArgs.get(DEFAULTS), defaults, info.initArgs, DEFAULTS, false);
|
||||
}
|
||||
merge((NamedList) info.initArgs.get(PluginInfo.INVARIANTS), invariants, info.initArgs, PluginInfo.INVARIANTS, false);
|
||||
merge((NamedList) info.initArgs.get(PluginInfo.APPENDS), appends, info.initArgs, PluginInfo.APPENDS, true);
|
||||
merge((NamedList) info.initArgs.get(INVARIANTS), invariants, info.initArgs, INVARIANTS, false);
|
||||
merge((NamedList) info.initArgs.get(APPENDS), appends, info.initArgs, APPENDS, true);
|
||||
}
|
||||
|
||||
private static void merge(NamedList first, NamedList second, NamedList sink, String name, boolean appends) {
|
||||
|
|
|
@ -16,14 +16,21 @@
|
|||
*/
|
||||
package org.apache.solr.core;
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrConfig.JmxConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.management.*;
|
||||
import javax.management.Attribute;
|
||||
import javax.management.AttributeList;
|
||||
import javax.management.AttributeNotFoundException;
|
||||
import javax.management.DynamicMBean;
|
||||
import javax.management.InvalidAttributeValueException;
|
||||
import javax.management.MBeanAttributeInfo;
|
||||
import javax.management.MBeanException;
|
||||
import javax.management.MBeanInfo;
|
||||
import javax.management.MBeanServer;
|
||||
import javax.management.MBeanServerFactory;
|
||||
import javax.management.MalformedObjectNameException;
|
||||
import javax.management.ObjectName;
|
||||
import javax.management.Query;
|
||||
import javax.management.QueryExp;
|
||||
import javax.management.ReflectionException;
|
||||
import javax.management.openmbean.OpenMBeanAttributeInfoSupport;
|
||||
import javax.management.openmbean.OpenType;
|
||||
import javax.management.openmbean.SimpleType;
|
||||
|
@ -32,9 +39,23 @@ import javax.management.remote.JMXConnectorServerFactory;
|
|||
import javax.management.remote.JMXServiceURL;
|
||||
import java.lang.reflect.Field;
|
||||
import java.lang.reflect.Method;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Hashtable;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrConfig.JmxConfiguration;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Responsible for finding (or creating) a MBeanServer from given configuration
|
||||
|
@ -239,7 +260,7 @@ public class JmxMonitoredMap<K, V> extends
|
|||
staticStats = new HashSet<>();
|
||||
|
||||
// For which getters are already available in SolrInfoMBean
|
||||
staticStats.add("name");
|
||||
staticStats.add(NAME);
|
||||
staticStats.add("version");
|
||||
staticStats.add("description");
|
||||
staticStats.add("category");
|
||||
|
|
|
@ -43,6 +43,8 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* This manages the lifecycle of a set of plugin of the same type .
|
||||
*/
|
||||
|
@ -372,7 +374,7 @@ public class PluginBag<T> implements AutoCloseable {
|
|||
|
||||
@Override
|
||||
public void init(PluginInfo info) {
|
||||
name = info.attributes.get("name");
|
||||
name = info.attributes.get(NAME);
|
||||
Object v = info.attributes.get("version");
|
||||
if (name == null || v == null) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "runtimeLib must have name and version");
|
||||
|
|
|
@ -18,6 +18,28 @@
|
|||
package org.apache.solr.core;
|
||||
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
|
@ -55,28 +77,8 @@ import org.w3c.dom.NodeList;
|
|||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import javax.xml.parsers.ParserConfigurationException;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import java.util.UUID;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
import static org.apache.solr.core.ConfigOverlay.ZNODEVER;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.LAZY;
|
||||
import static org.apache.solr.core.SolrConfig.PluginOpts.MULTI_OK;
|
||||
|
@ -242,7 +244,7 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
CacheConfig conf = CacheConfig.getConfig(this, "query/fieldValueCache");
|
||||
if (conf == null) {
|
||||
Map<String, String> args = new HashMap<>();
|
||||
args.put("name", "fieldValueCache");
|
||||
args.put(NAME, "fieldValueCache");
|
||||
args.put("size", "10000");
|
||||
args.put("initialSize", "10");
|
||||
args.put("showItems", "-1");
|
||||
|
@ -741,7 +743,7 @@ public class SolrConfig extends Config implements MapSerializable {
|
|||
Node node = nodes.item(i);
|
||||
|
||||
String baseDir = DOMUtil.getAttr(node, "dir");
|
||||
String path = DOMUtil.getAttr(node, "path");
|
||||
String path = DOMUtil.getAttr(node, PATH);
|
||||
if (null != baseDir) {
|
||||
// :TODO: add support for a simpler 'glob' mutually exclusive of regex
|
||||
String regex = DOMUtil.getAttr(node, "regex");
|
||||
|
|
|
@ -66,7 +66,6 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
|
||||
import org.apache.solr.cloud.CloudDescriptor;
|
||||
|
@ -146,6 +145,8 @@ import org.slf4j.Logger;
|
|||
import org.slf4j.LoggerFactory;
|
||||
import org.xml.sax.SAXException;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -2012,7 +2013,7 @@ public final class SolrCore implements SolrInfoMBean, Closeable {
|
|||
// for back compat, we set these now just in case other code
|
||||
// are expecting them during handleRequest
|
||||
toLog.add("webapp", req.getContext().get("webapp"));
|
||||
toLog.add("path", req.getContext().get("path"));
|
||||
toLog.add(PATH, req.getContext().get(PATH));
|
||||
|
||||
final SolrParams params = req.getParams();
|
||||
final String lpList = params.get(CommonParams.LOG_PARAMS_LIST);
|
||||
|
|
|
@ -17,6 +17,20 @@ package org.apache.solr.core;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
|
||||
import com.google.common.base.Strings;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -31,19 +45,7 @@ import org.w3c.dom.Node;
|
|||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.InputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.nio.file.Path;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.Set;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -176,7 +178,7 @@ public class SolrXmlConfig {
|
|||
Properties properties = new Properties();
|
||||
for (int i = 0; i < props.getLength(); i++) {
|
||||
Node prop = props.item(i);
|
||||
properties.setProperty(DOMUtil.getAttr(prop, "name"),
|
||||
properties.setProperty(DOMUtil.getAttr(prop, NAME),
|
||||
PropertiesUtil.substituteProperty(DOMUtil.getAttr(prop, "value"), null));
|
||||
}
|
||||
return properties;
|
||||
|
|
|
@ -59,6 +59,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.apache.solr.common.cloud.ZkNodeProps.makeMap;
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
|
||||
public class BlobHandler extends RequestHandlerBase implements PluginInfoInitialized {
|
||||
protected static final Logger log = LoggerFactory.getLogger(BlobHandler.class);
|
||||
|
@ -72,7 +73,7 @@ public class BlobHandler extends RequestHandlerBase implements PluginInfoInitial
|
|||
public void handleRequestBody(final SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
String httpMethod = (String) req.getContext().get("httpMethod");
|
||||
String path = (String) req.getContext().get("path");
|
||||
SolrConfigHandler.setWt(req, "json");
|
||||
SolrConfigHandler.setWt(req, JSON);
|
||||
|
||||
List<String> pieces = StrUtils.splitSmart(path, '/');
|
||||
String blobName = null;
|
||||
|
|
|
@ -17,6 +17,16 @@
|
|||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -39,13 +49,7 @@ import org.apache.solr.util.EmptyEntityResolver;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.*;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* An analysis handler that provides a breakdown of the analysis process of provided documents. This handler expects a
|
||||
|
@ -311,7 +315,7 @@ public class DocumentAnalysisRequestHandler extends AnalysisRequestHandlerBase {
|
|||
|
||||
for (int i = 0; i < reader.getAttributeCount(); i++) {
|
||||
String attrName = reader.getAttributeLocalName(i);
|
||||
if ("name".equals(attrName)) {
|
||||
if (NAME.equals(attrName)) {
|
||||
fieldName = reader.getAttributeValue(i);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,12 +26,12 @@ import org.apache.commons.io.IOUtils;
|
|||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.InitParams;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.util.plugin.PluginInfoInitialized;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class DumpRequestHandler extends RequestHandlerBase
|
||||
{
|
||||
|
@ -68,7 +68,7 @@ public class DumpRequestHandler extends RequestHandlerBase
|
|||
// Cycle through each stream
|
||||
for( ContentStream content : req.getContentStreams() ) {
|
||||
NamedList<Object> stream = new SimpleOrderedMap<>();
|
||||
stream.add( "name", content.getName() );
|
||||
stream.add(NAME, content.getName());
|
||||
stream.add( "sourceInfo", content.getSourceInfo() );
|
||||
stream.add( "size", content.getSize() );
|
||||
stream.add( "contentType", content.getContentType() );
|
||||
|
|
|
@ -16,26 +16,6 @@
|
|||
*/
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import static org.apache.solr.handler.ReplicationHandler.ALIAS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
|
||||
import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.GENERATION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.NAME;
|
||||
import static org.apache.solr.handler.ReplicationHandler.OFFSET;
|
||||
import static org.apache.solr.handler.ReplicationHandler.SIZE;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
|
@ -109,6 +89,27 @@ import org.apache.solr.util.RefCounted;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.JAVABIN;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
import static org.apache.solr.handler.ReplicationHandler.ALIAS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CHECKSUM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_DETAILS;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_GET_FILE_LIST;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CMD_INDEX_VERSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMMAND;
|
||||
import static org.apache.solr.handler.ReplicationHandler.COMPRESSION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILES;
|
||||
import static org.apache.solr.handler.ReplicationHandler.CONF_FILE_SHORT;
|
||||
import static org.apache.solr.handler.ReplicationHandler.EXTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE;
|
||||
import static org.apache.solr.handler.ReplicationHandler.FILE_STREAM;
|
||||
import static org.apache.solr.handler.ReplicationHandler.GENERATION;
|
||||
import static org.apache.solr.handler.ReplicationHandler.INTERNAL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.MASTER_URL;
|
||||
import static org.apache.solr.handler.ReplicationHandler.OFFSET;
|
||||
import static org.apache.solr.handler.ReplicationHandler.SIZE;
|
||||
|
||||
/**
|
||||
* <p> Provides functionality of downloading changed index files as well as config files and a timer for scheduling fetches from the
|
||||
* master. </p>
|
||||
|
@ -196,7 +197,7 @@ public class IndexFetcher {
|
|||
NamedList getLatestVersion() throws IOException {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(COMMAND, CMD_INDEX_VERSION);
|
||||
params.set(CommonParams.WT, "javabin");
|
||||
params.set(CommonParams.WT, JAVABIN);
|
||||
params.set(CommonParams.QT, "/replication");
|
||||
QueryRequest req = new QueryRequest(params);
|
||||
|
||||
|
@ -218,7 +219,7 @@ public class IndexFetcher {
|
|||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set(COMMAND, CMD_GET_FILE_LIST);
|
||||
params.set(GENERATION, String.valueOf(gen));
|
||||
params.set(CommonParams.WT, "javabin");
|
||||
params.set(CommonParams.WT, JAVABIN);
|
||||
params.set(CommonParams.QT, "/replication");
|
||||
QueryRequest req = new QueryRequest(params);
|
||||
|
||||
|
|
|
@ -21,13 +21,15 @@ import org.apache.solr.common.SolrException;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
/**
|
||||
* Does nothing other than showing a 404 message
|
||||
*/
|
||||
public class NotFoundRequestHandler extends RequestHandlerBase{
|
||||
@Override
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
throw new SolrException(SolrException.ErrorCode.NOT_FOUND,""+req.getContext().get("path") +" is not found");
|
||||
throw new SolrException(SolrException.ErrorCode.NOT_FOUND, "" + req.getContext().get(PATH) + " is not found");
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -88,6 +88,8 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* <p> A Handler which provides a REST API for replication and serves replication requests from Slaves. </p>
|
||||
* <p>When running on the master, it provides the following commands <ol> <li>Get the current replicable index version
|
||||
|
@ -295,12 +297,12 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
}
|
||||
|
||||
private void deleteSnapshot(ModifiableSolrParams params) {
|
||||
String name = params.get("name");
|
||||
String name = params.get(NAME);
|
||||
if(name == null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing mandatory param: name");
|
||||
}
|
||||
|
||||
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get("name"));
|
||||
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME));
|
||||
snapShooter.validateDeleteSnapshot();
|
||||
snapShooter.deleteSnapAsync(this);
|
||||
}
|
||||
|
@ -398,7 +400,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
}
|
||||
|
||||
// small race here before the commit point is saved
|
||||
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get("name"));
|
||||
SnapShooter snapShooter = new SnapShooter(core, params.get("location"), params.get(NAME));
|
||||
snapShooter.validateCreateSnapshot();
|
||||
snapShooter.createSnapAsync(indexCommit, numberToKeep, this);
|
||||
|
||||
|
@ -460,7 +462,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
long checksum = CodecUtil.retrieveChecksum(in);
|
||||
fileMeta.put(CHECKSUM, checksum);
|
||||
} catch(Exception e) {
|
||||
LOG.warn("Could not read checksum from index file.", e);
|
||||
LOG.warn("Could not read checksum from index file: " + file, e);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -478,7 +480,7 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
try {
|
||||
fileMeta.put(CHECKSUM, CodecUtil.retrieveChecksum(in));
|
||||
} catch(Exception e) {
|
||||
LOG.warn("Could not read checksum from index file.", e);
|
||||
LOG.warn("Could not read checksum from index file: " + infos.getSegmentsFileName(), e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1445,8 +1447,6 @@ public class ReplicationHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
|
||||
public static final String FILE = "file";
|
||||
|
||||
public static final String NAME = "name";
|
||||
|
||||
public static final String SIZE = "size";
|
||||
|
||||
public static final String MAX_WRITE_PER_SECOND = "maxWriteMBPerSec";
|
||||
|
|
|
@ -39,12 +39,14 @@ import org.apache.solr.schema.ZkIndexSchemaReader;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
|
||||
public class SchemaHandler extends RequestHandlerBase {
|
||||
private static final Logger log = LoggerFactory.getLogger(SchemaHandler.class);
|
||||
|
||||
@Override
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
SolrConfigHandler.setWt(req, "json");
|
||||
SolrConfigHandler.setWt(req, JSON);
|
||||
String httpMethod = (String) req.getContext().get("httpMethod");
|
||||
if ("POST".equals(httpMethod)) {
|
||||
if (req.getContentStreams() == null) {
|
||||
|
|
|
@ -44,14 +44,12 @@ import org.apache.solr.client.solrj.SolrClient;
|
|||
import org.apache.solr.client.solrj.SolrRequest;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrClient;
|
||||
import org.apache.solr.cloud.ZkCLI;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
import org.apache.solr.cloud.ZkSolrResourceLoader;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.cloud.ClusterState;
|
||||
import org.apache.solr.common.cloud.Replica;
|
||||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.SolrZkClient;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
|
@ -62,22 +60,18 @@ import org.apache.solr.common.util.ContentStream;
|
|||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.ConfigOverlay;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.ImplicitPlugins;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.core.RequestParams;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.handler.admin.CollectionsHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.response.BinaryResponseWriter;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.SchemaManager;
|
||||
import org.apache.solr.util.CommandOperation;
|
||||
import org.apache.solr.util.DefaultSolrThreadFactory;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
|
@ -112,7 +106,7 @@ public class SolrConfigHandler extends RequestHandlerBase {
|
|||
@Override
|
||||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
|
||||
|
||||
setWt(req, "json");
|
||||
setWt(req, CommonParams.JSON);
|
||||
String httpMethod = (String) req.getContext().get("httpMethod");
|
||||
Command command = new Command(req, rsp, httpMethod);
|
||||
if ("POST".equals(httpMethod)) {
|
||||
|
|
|
@ -17,15 +17,12 @@
|
|||
|
||||
package org.apache.solr.handler;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.cloud.ZkNodeProps;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.MapSolrParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
@ -33,7 +30,6 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.PluginInfo;
|
||||
import org.apache.solr.handler.loader.CSVLoader;
|
||||
import org.apache.solr.handler.loader.ContentStreamLoader;
|
||||
import org.apache.solr.handler.loader.JavabinLoader;
|
||||
|
@ -45,8 +41,7 @@ import org.apache.solr.update.processor.UpdateRequestProcessor;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static java.util.Collections.singletonMap;
|
||||
import static org.apache.solr.common.cloud.ZkNodeProps.makeMap;
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
/**
|
||||
* UpdateHandler that uses content-type to pick the right Loader
|
||||
|
@ -77,7 +72,7 @@ public class UpdateRequestHandler extends ContentStreamHandlerBase {
|
|||
public void load(SolrQueryRequest req, SolrQueryResponse rsp,
|
||||
ContentStream stream, UpdateRequestProcessor processor) throws Exception {
|
||||
|
||||
ContentStreamLoader loader = pathVsLoaders.get(req.getContext().get("path"));
|
||||
ContentStreamLoader loader = pathVsLoaders.get(req.getContext().get(PATH));
|
||||
if(loader == null) {
|
||||
String type = req.getParams().get(UpdateParams.ASSUME_CONTENT_TYPE);
|
||||
if (type == null) {
|
||||
|
|
|
@ -17,6 +17,17 @@ package org.apache.solr.handler.admin;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import com.google.common.collect.ImmutableSet;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.solr.client.solrj.SolrResponse;
|
||||
|
@ -59,17 +70,6 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import static org.apache.solr.cloud.Overseer.QUEUE_OPERATION;
|
||||
import static org.apache.solr.cloud.OverseerCollectionProcessor.ASYNC;
|
||||
import static org.apache.solr.cloud.OverseerCollectionProcessor.COLL_CONF;
|
||||
|
@ -118,6 +118,7 @@ import static org.apache.solr.common.params.CollectionParams.CollectionAction.RE
|
|||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.RELOAD;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.REMOVEROLE;
|
||||
import static org.apache.solr.common.params.CollectionParams.CollectionAction.SPLITSHARD;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public class CollectionsHandler extends RequestHandlerBase {
|
||||
protected static Logger log = LoggerFactory.getLogger(CollectionsHandler.class);
|
||||
|
@ -607,8 +608,8 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
}
|
||||
|
||||
private void handleProp(SolrQueryRequest req, SolrQueryResponse rsp) throws KeeperException, InterruptedException {
|
||||
req.getParams().required().check("name");
|
||||
String name = req.getParams().get("name");
|
||||
req.getParams().required().check(NAME);
|
||||
String name = req.getParams().get(NAME);
|
||||
if(!OverseerCollectionProcessor.KNOWN_CLUSTER_PROPS.contains(name)){
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "Not a known cluster property "+ name);
|
||||
}
|
||||
|
@ -616,7 +617,7 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
Map<String,Object> props = ZkNodeProps.makeMap(
|
||||
Overseer.QUEUE_OPERATION, CLUSTERPROP.toLower() );
|
||||
copyIfNotNull(req.getParams(),props,
|
||||
"name",
|
||||
NAME,
|
||||
"val");
|
||||
|
||||
Overseer.getInQueue(coreContainer.getZkController().getZkClient()).offer(ZkStateReader.toJSON(props)) ;
|
||||
|
@ -754,10 +755,10 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
|
||||
private void handleReloadAction(SolrQueryRequest req, SolrQueryResponse rsp) throws KeeperException, InterruptedException {
|
||||
log.info("Reloading Collection : " + req.getParamString());
|
||||
String name = req.getParams().required().get("name");
|
||||
String name = req.getParams().required().get(NAME);
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
RELOAD.toLower(), "name", name);
|
||||
RELOAD.toLower(), NAME, name);
|
||||
|
||||
handleResponse(RELOAD.toLower(), m, rsp);
|
||||
}
|
||||
|
@ -787,11 +788,11 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
private void handleCreateAliasAction(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp) throws Exception {
|
||||
log.info("Create alias action : " + req.getParamString());
|
||||
String name = req.getParams().required().get("name");
|
||||
String name = req.getParams().required().get(NAME);
|
||||
String collections = req.getParams().required().get("collections");
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
CREATEALIAS.toLower(), "name", name, "collections",
|
||||
CREATEALIAS.toLower(), NAME, name, "collections",
|
||||
collections);
|
||||
|
||||
handleResponse(CREATEALIAS.toLower(), m, rsp);
|
||||
|
@ -800,21 +801,21 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
private void handleDeleteAliasAction(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp) throws Exception {
|
||||
log.info("Delete alias action : " + req.getParamString());
|
||||
String name = req.getParams().required().get("name");
|
||||
String name = req.getParams().required().get(NAME);
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
DELETEALIAS.toLower(), "name", name);
|
||||
DELETEALIAS.toLower(), NAME, name);
|
||||
|
||||
handleResponse(DELETEALIAS.toLower(), m, rsp);
|
||||
}
|
||||
|
||||
private void handleDeleteAction(SolrQueryRequest req, SolrQueryResponse rsp) throws KeeperException, InterruptedException {
|
||||
log.info("Deleting Collection : " + req.getParamString());
|
||||
|
||||
String name = req.getParams().required().get("name");
|
||||
|
||||
String name = req.getParams().required().get(NAME);
|
||||
|
||||
ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION,
|
||||
DELETE.toLower(), "name", name);
|
||||
DELETE.toLower(), NAME, name);
|
||||
|
||||
handleResponse(DELETE.toLower(), m, rsp);
|
||||
}
|
||||
|
@ -827,7 +828,7 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
private void handleCreateAction(SolrQueryRequest req,
|
||||
SolrQueryResponse rsp) throws InterruptedException, KeeperException {
|
||||
log.info("Creating Collection : " + req.getParamString());
|
||||
String name = req.getParams().required().get("name");
|
||||
String name = req.getParams().required().get(NAME);
|
||||
if (name == null) {
|
||||
log.error("Collection name is required to create a new collection");
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST,
|
||||
|
@ -839,7 +840,7 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
CREATE.toLower(),
|
||||
"fromApi","true");
|
||||
copyIfNotNull(req.getParams(),props,
|
||||
"name",
|
||||
NAME,
|
||||
REPLICATION_FACTOR,
|
||||
COLL_CONF,
|
||||
NUM_SLICES,
|
||||
|
@ -901,7 +902,7 @@ public class CollectionsHandler extends RequestHandlerBase {
|
|||
log.info("Create shard: " + req.getParamString());
|
||||
req.getParams().required().check(COLLECTION_PROP, SHARD_ID_PROP);
|
||||
ClusterState clusterState = coreContainer.getZkController().getClusterState();
|
||||
if(!ImplicitDocRouter.NAME.equals( ((Map) clusterState.getCollection(req.getParams().get(COLLECTION_PROP)).get(DOC_ROUTER)).get("name") ) )
|
||||
if (!ImplicitDocRouter.NAME.equals(((Map) clusterState.getCollection(req.getParams().get(COLLECTION_PROP)).get(DOC_ROUTER)).get(NAME)))
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "shards can be added only to 'implicit' collections" );
|
||||
|
||||
Map<String, Object> map = makeMap(QUEUE_OPERATION, CREATESHARD.toLower());
|
||||
|
|
|
@ -17,6 +17,23 @@
|
|||
|
||||
package org.apache.solr.handler.admin;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
|
@ -68,24 +85,9 @@ import org.apache.zookeeper.KeeperException;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Date;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
|
||||
import static org.apache.solr.common.cloud.DocCollection.DOC_ROUTER;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -312,7 +314,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
SolrParams params = adminReq.getParams();
|
||||
List<DocRouter.Range> ranges = null;
|
||||
|
||||
String[] pathsArr = params.getParams("path");
|
||||
String[] pathsArr = params.getParams(PATH);
|
||||
String rangesStr = params.get(CoreAdminParams.RANGES); // ranges=a-b,c-d,e-f
|
||||
if (rangesStr != null) {
|
||||
String[] rangesArr = rangesStr.split(",");
|
||||
|
@ -1105,7 +1107,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
// It would be a real mistake to load the cores just to get the status
|
||||
CoreDescriptor desc = cores.getUnloadedCoreDescriptor(cname);
|
||||
if (desc != null) {
|
||||
info.add("name", desc.getName());
|
||||
info.add(NAME, desc.getName());
|
||||
info.add("instanceDir", desc.getInstanceDir());
|
||||
// None of the following are guaranteed to be present in a not-yet-loaded core.
|
||||
String tmp = desc.getDataDir();
|
||||
|
@ -1119,7 +1121,7 @@ public class CoreAdminHandler extends RequestHandlerBase {
|
|||
} else {
|
||||
try (SolrCore core = cores.getCore(cname)) {
|
||||
if (core != null) {
|
||||
info.add("name", core.getName());
|
||||
info.add(NAME, core.getName());
|
||||
info.add("instanceDir", normalizePath(core.getResourceLoader().getInstanceDir()));
|
||||
info.add("dataDir", normalizePath(core.getDataDir()));
|
||||
info.add("config", core.getConfigResource());
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.solr.response.SolrQueryResponse;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
public class InfoHandler extends RequestHandlerBase {
|
||||
protected static Logger log = LoggerFactory.getLogger(InfoHandler.class);
|
||||
|
@ -75,7 +76,7 @@ public class InfoHandler extends RequestHandlerBase {
|
|||
"Core container instance missing");
|
||||
}
|
||||
|
||||
String path = (String) req.getContext().get("path");
|
||||
String path = (String) req.getContext().get(PATH);
|
||||
int i = path.lastIndexOf('/');
|
||||
String name = path.substring(i + 1, path.length());
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@ import org.apache.solr.handler.RequestHandlerBase;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* @since solr 1.2
|
||||
*/
|
||||
|
@ -60,7 +62,7 @@ public class PluginInfoHandler extends RequestHandlerBase
|
|||
SimpleOrderedMap<Object> info = new SimpleOrderedMap<>();
|
||||
category.add( entry.getKey(), info );
|
||||
|
||||
info.add( "name", (m.getName() !=null ? m.getName() : na) );
|
||||
info.add( NAME, (m.getName() !=null ? m.getName() : na) );
|
||||
info.add( "version", (m.getVersion() !=null ? m.getVersion() : na) );
|
||||
info.add( "description", (m.getDescription()!=null ? m.getDescription() : na) );
|
||||
info.add( "source", (m.getSource() !=null ? m.getSource() : na) );
|
||||
|
|
|
@ -18,12 +18,15 @@
|
|||
package org.apache.solr.handler.admin;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.handler.RequestHandlerBase;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
*
|
||||
* @since solr 1.2
|
||||
|
@ -34,7 +37,7 @@ public class PropertiesRequestHandler extends RequestHandlerBase
|
|||
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException
|
||||
{
|
||||
Object props = null;
|
||||
String name = req.getParams().get( "name" );
|
||||
String name = req.getParams().get(NAME);
|
||||
if( name != null ) {
|
||||
NamedList<String> p = new SimpleOrderedMap<>();
|
||||
p.add( name, System.getProperty(name) );
|
||||
|
|
|
@ -20,6 +20,8 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -67,7 +69,7 @@ public class SegmentsInfoRequestHandler extends RequestHandlerBase {
|
|||
if (mergeCandidates.contains(segmentCommitInfo.info.name)) {
|
||||
segmentInfo.add("mergeCandidate", true);
|
||||
}
|
||||
segmentInfos.add((String) segmentInfo.get("name"), segmentInfo);
|
||||
segmentInfos.add((String) segmentInfo.get(NAME), segmentInfo);
|
||||
}
|
||||
|
||||
return segmentInfos;
|
||||
|
@ -77,7 +79,7 @@ public class SegmentsInfoRequestHandler extends RequestHandlerBase {
|
|||
SegmentCommitInfo segmentCommitInfo) throws IOException {
|
||||
SimpleOrderedMap<Object> segmentInfoMap = new SimpleOrderedMap<>();
|
||||
|
||||
segmentInfoMap.add("name", segmentCommitInfo.info.name);
|
||||
segmentInfoMap.add(NAME, segmentCommitInfo.info.name);
|
||||
segmentInfoMap.add("delCount", segmentCommitInfo.getDelCount());
|
||||
segmentInfoMap.add("sizeInBytes", segmentCommitInfo.sizeInBytes());
|
||||
segmentInfoMap.add("size", segmentCommitInfo.info.maxDoc());
|
||||
|
|
|
@ -46,6 +46,8 @@ import org.apache.solr.schema.IndexSchema;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
|
||||
/**
|
||||
* This handler returns system info
|
||||
|
@ -158,7 +160,7 @@ public class SystemInfoHandler extends RequestHandlerBase
|
|||
SimpleOrderedMap<Object> info = new SimpleOrderedMap<>();
|
||||
|
||||
OperatingSystemMXBean os = ManagementFactory.getOperatingSystemMXBean();
|
||||
info.add( "name", os.getName() );
|
||||
info.add(NAME, os.getName());
|
||||
info.add( "version", os.getVersion() );
|
||||
info.add( "arch", os.getArch() );
|
||||
info.add( "systemLoadAverage", os.getSystemLoadAverage());
|
||||
|
@ -262,12 +264,12 @@ public class SystemInfoHandler extends RequestHandlerBase
|
|||
|
||||
// Summary Info
|
||||
jvm.add( "version", jreVersion + " " + vmVersion);
|
||||
jvm.add( "name", jreVendor + " " + vmName );
|
||||
jvm.add(NAME, jreVendor + " " + vmName);
|
||||
|
||||
// details
|
||||
SimpleOrderedMap<Object> java = new SimpleOrderedMap<>();
|
||||
java.add( "vendor", javaVendor );
|
||||
java.add( "name", javaName );
|
||||
java.add(NAME, javaName);
|
||||
java.add( "version", javaVersion );
|
||||
jvm.add( "spec", java );
|
||||
SimpleOrderedMap<Object> jre = new SimpleOrderedMap<>();
|
||||
|
@ -276,7 +278,7 @@ public class SystemInfoHandler extends RequestHandlerBase
|
|||
jvm.add( "jre", jre );
|
||||
SimpleOrderedMap<Object> vm = new SimpleOrderedMap<>();
|
||||
vm.add( "vendor", vmVendor );
|
||||
vm.add( "name", vmName );
|
||||
vm.add(NAME, vmName);
|
||||
vm.add( "version", vmVersion );
|
||||
jvm.add( "vm", vm );
|
||||
|
||||
|
|
|
@ -29,6 +29,8 @@ import org.apache.solr.handler.RequestHandlerBase;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
*
|
||||
* @since solr 1.2
|
||||
|
@ -85,7 +87,7 @@ public class ThreadDumpHandler extends RequestHandlerBase
|
|||
long tid = ti.getThreadId();
|
||||
|
||||
info.add( "id", tid );
|
||||
info.add( "name", ti.getThreadName() );
|
||||
info.add(NAME, ti.getThreadName());
|
||||
info.add( "state", ti.getThreadState().toString() );
|
||||
|
||||
if (ti.getLockName() != null) {
|
||||
|
@ -105,7 +107,7 @@ public class ThreadDumpHandler extends RequestHandlerBase
|
|||
|
||||
if (ti.getLockOwnerName() != null) {
|
||||
SimpleOrderedMap<Object> owner = new SimpleOrderedMap<>();
|
||||
owner.add( "name", ti.getLockOwnerName() );
|
||||
owner.add(NAME, ti.getLockOwnerName());
|
||||
owner.add( "id", ti.getLockOwnerId() );
|
||||
}
|
||||
|
||||
|
|
|
@ -17,17 +17,6 @@
|
|||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
|
@ -41,7 +30,19 @@ import java.util.Set;
|
|||
import java.util.TreeMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.search.DocList;
|
||||
import org.apache.solr.search.QueryParsing;
|
||||
import org.apache.solr.util.SolrPluginUtils;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.FQ;
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
|
||||
/**
|
||||
* Adds debugging information to a request.
|
||||
|
@ -107,7 +108,7 @@ public class DebugComponent extends SearchComponent
|
|||
}
|
||||
|
||||
if (rb.req.getJSON() != null) {
|
||||
info.add("json", rb.req.getJSON());
|
||||
info.add(JSON, rb.req.getJSON());
|
||||
}
|
||||
|
||||
if (rb.isDebugQuery() && rb.getQparser() != null) {
|
||||
|
|
|
@ -17,12 +17,17 @@
|
|||
|
||||
package org.apache.solr.handler.component;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.index.ExitableDirectoryReader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.common.SolrDocumentList;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
|
@ -43,11 +48,7 @@ import org.apache.solr.util.plugin.SolrCoreAware;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -307,7 +308,7 @@ public class SearchHandler extends RequestHandlerBase implements SolrCoreAware ,
|
|||
// as the default but operators need to update their luceneMatchVersion to enable
|
||||
// this behavior since it did not work this way prior to 5.1
|
||||
if (req.getCore().getSolrConfig().luceneMatchVersion.onOrAfter(Version.LUCENE_5_1_0)) {
|
||||
String reqPath = (String)req.getContext().get("path");
|
||||
String reqPath = (String) req.getContext().get(PATH);
|
||||
if (!"/select".equals(reqPath)) {
|
||||
params.set(CommonParams.QT, reqPath);
|
||||
} // else if path is /select, then the qt gets passed thru if set
|
||||
|
|
|
@ -30,30 +30,33 @@ import java.util.Map;
|
|||
import java.util.UUID;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.JsonRecordReader;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.util.RecordingJSONParser;
|
||||
import org.noggit.JSONParser;
|
||||
import org.noggit.ObjectBuilder;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.UpdateParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.JsonRecordReader;
|
||||
import org.apache.solr.handler.RequestHandlerUtils;
|
||||
import org.apache.solr.handler.UpdateRequestHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.update.AddUpdateCommand;
|
||||
import org.apache.solr.update.CommitUpdateCommand;
|
||||
import org.apache.solr.update.DeleteUpdateCommand;
|
||||
import org.apache.solr.update.RollbackUpdateCommand;
|
||||
import org.apache.solr.update.processor.UpdateRequestProcessor;
|
||||
import org.apache.solr.util.RecordingJSONParser;
|
||||
import org.noggit.JSONParser;
|
||||
import org.noggit.ObjectBuilder;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
|
||||
/**
|
||||
* @since solr 4.0
|
||||
|
@ -64,7 +67,7 @@ public class JsonLoader extends ContentStreamLoader {
|
|||
|
||||
@Override
|
||||
public String getDefaultWT() {
|
||||
return "json";
|
||||
return JSON;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -115,7 +118,7 @@ public class JsonLoader extends ContentStreamLoader {
|
|||
|
||||
@SuppressWarnings("fallthrough")
|
||||
void processUpdate(Reader reader) throws IOException {
|
||||
String path = (String) req.getContext().get("path");
|
||||
String path = (String) req.getContext().get(PATH);
|
||||
if (UpdateRequestHandler.DOC_PATH.equals(path) || "false".equals(req.getParams().get("json.command"))) {
|
||||
String split = req.getParams().get("split");
|
||||
String[] f = req.getParams().getParams("f");
|
||||
|
|
|
@ -16,8 +16,27 @@ package org.apache.solr.handler.loader;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
import javax.xml.stream.FactoryConfigurationError;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.dom.DOMResult;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.sax.SAXSource;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.client.solrj.request.UpdateRequest;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -47,26 +66,7 @@ import org.slf4j.LoggerFactory;
|
|||
import org.xml.sax.InputSource;
|
||||
import org.xml.sax.XMLReader;
|
||||
|
||||
import javax.xml.parsers.SAXParserFactory;
|
||||
import javax.xml.stream.FactoryConfigurationError;
|
||||
import javax.xml.stream.XMLInputFactory;
|
||||
import javax.xml.stream.XMLStreamConstants;
|
||||
import javax.xml.stream.XMLStreamException;
|
||||
import javax.xml.stream.XMLStreamReader;
|
||||
import javax.xml.transform.Transformer;
|
||||
import javax.xml.transform.TransformerException;
|
||||
import javax.xml.transform.dom.DOMResult;
|
||||
import javax.xml.transform.dom.DOMSource;
|
||||
import javax.xml.transform.sax.SAXSource;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
|
||||
public class XMLLoader extends ContentStreamLoader {
|
||||
|
@ -467,7 +467,7 @@ public class XMLLoader extends ContentStreamLoader {
|
|||
for (int i = 0; i < parser.getAttributeCount(); i++) {
|
||||
attrName = parser.getAttributeLocalName(i);
|
||||
attrVal = parser.getAttributeValue(i);
|
||||
if ("name".equals(attrName)) {
|
||||
if (NAME.equals(attrName)) {
|
||||
name = attrVal;
|
||||
} else if ("boost".equals(attrName)) {
|
||||
boost = Float.parseFloat(attrVal);
|
||||
|
|
|
@ -19,6 +19,8 @@ package org.apache.solr.logging;
|
|||
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* Wrapper class for Logger implementaions
|
||||
*/
|
||||
|
@ -44,7 +46,7 @@ public abstract class LoggerInfo implements Comparable<LoggerInfo> {
|
|||
|
||||
public SimpleOrderedMap<?> getInfo() {
|
||||
SimpleOrderedMap<Object> info = new SimpleOrderedMap<>();
|
||||
info.add("name", getName());
|
||||
info.add(NAME, getName());
|
||||
info.add("level", getLevel());
|
||||
info.add("set", isSet());
|
||||
return info;
|
||||
|
|
|
@ -18,6 +18,11 @@ package org.apache.solr.request.json;
|
|||
*/
|
||||
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.commons.io.IOUtils;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
|
@ -25,7 +30,6 @@ import org.apache.solr.common.params.MultiMapSolrParams;
|
|||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.handler.component.SearchHandler;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
|
@ -33,10 +37,7 @@ import org.apache.solr.request.macro.MacroExpander;
|
|||
import org.noggit.JSONParser;
|
||||
import org.noggit.ObjectBuilder;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
|
||||
public class RequestUtil {
|
||||
/**
|
||||
|
@ -65,7 +66,7 @@ public class RequestUtil {
|
|||
req.setParams(params);
|
||||
}
|
||||
|
||||
String[] jsonFromParams = map.remove("json"); // params from the query string should come after (and hence override) JSON content streams
|
||||
String[] jsonFromParams = map.remove(JSON); // params from the query string should come after (and hence override) JSON content streams
|
||||
|
||||
for (ContentStream cs : req.getContentStreams()) {
|
||||
String contentType = cs.getContentType();
|
||||
|
@ -75,7 +76,7 @@ public class RequestUtil {
|
|||
|
||||
try {
|
||||
String jsonString = IOUtils.toString( cs.getReader() );
|
||||
MultiMapSolrParams.addParam("json", jsonString, map);
|
||||
MultiMapSolrParams.addParam(JSON, jsonString, map);
|
||||
} catch (IOException e) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Exception reading content stream for request:"+req, e);
|
||||
}
|
||||
|
@ -84,12 +85,12 @@ public class RequestUtil {
|
|||
// append existing "json" params
|
||||
if (jsonFromParams != null) {
|
||||
for (String json : jsonFromParams) {
|
||||
MultiMapSolrParams.addParam("json", json, map);
|
||||
MultiMapSolrParams.addParam(JSON, json, map);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String[] jsonS = params.getParams("json");
|
||||
String[] jsonS = params.getParams(JSON);
|
||||
|
||||
boolean hasAdditions = defaults != null || invariants != null || appends != null || jsonS != null;
|
||||
|
||||
|
@ -168,12 +169,12 @@ public class RequestUtil {
|
|||
|
||||
Map<String, Object> json = null;
|
||||
// Handle JSON body first, so query params will always overlay on that
|
||||
jsonS = newMap.get("json");
|
||||
jsonS = newMap.get(JSON);
|
||||
if (jsonS != null) {
|
||||
if (json == null) {
|
||||
json = new LinkedHashMap<>();
|
||||
}
|
||||
mergeJSON(json, "json", jsonS, new ObjectUtil.ConflictHandler());
|
||||
mergeJSON(json, JSON, jsonS, new ObjectUtil.ConflictHandler());
|
||||
}
|
||||
for (String key : newMap.keySet()) {
|
||||
// json.nl, json.wrf are existing query parameters
|
||||
|
|
|
@ -36,6 +36,8 @@ import org.apache.solr.search.ReturnFields;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* @lucene.internal
|
||||
*/
|
||||
|
@ -327,7 +329,7 @@ public class SchemaXmlWriter extends TextResponseWriter {
|
|||
writer.write('<');
|
||||
writer.write(tag);
|
||||
if (name!=null) {
|
||||
writeAttr("name", name);
|
||||
writeAttr(NAME, name);
|
||||
if (closeTag) {
|
||||
writer.write("/>");
|
||||
} else {
|
||||
|
|
|
@ -33,6 +33,8 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
import org.apache.solr.search.ReturnFields;
|
||||
import org.apache.solr.search.SolrReturnFields;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
|
||||
/**
|
||||
* @lucene.internal
|
||||
|
@ -144,7 +146,7 @@ public class XMLWriter extends TextResponseWriter {
|
|||
writer.write('<');
|
||||
writer.write(tag);
|
||||
if (name!=null) {
|
||||
writeAttr("name", name);
|
||||
writeAttr(NAME, name);
|
||||
if (closeTag) {
|
||||
writer.write("/>");
|
||||
} else {
|
||||
|
@ -167,7 +169,7 @@ public class XMLWriter extends TextResponseWriter {
|
|||
if (doIndent) indent();
|
||||
|
||||
writer.write("<result");
|
||||
writeAttr("name",name);
|
||||
writeAttr(NAME, name);
|
||||
writeAttr("numFound",Long.toString(numFound));
|
||||
writeAttr("start",Long.toString(start));
|
||||
if(maxScore!=null) {
|
||||
|
|
|
@ -16,6 +16,11 @@ package org.apache.solr.rest;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -36,11 +41,7 @@ import org.restlet.resource.ResourceException;
|
|||
import org.restlet.resource.ServerResource;
|
||||
import org.slf4j.Logger;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.io.UnsupportedEncodingException;
|
||||
import java.net.URLDecoder;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
|
||||
/**
|
||||
* Base class of all Solr Restlet server resource classes.
|
||||
|
@ -103,7 +104,7 @@ public abstract class BaseSolrResource extends ServerResource {
|
|||
schema = solrRequest.getSchema();
|
||||
String responseWriterName = solrRequest.getParams().get(CommonParams.WT);
|
||||
if (null == responseWriterName) {
|
||||
responseWriterName = "json"; // Default to json writer
|
||||
responseWriterName = JSON; // Default to json writer
|
||||
}
|
||||
String indent = solrRequest.getParams().get("indent");
|
||||
if (null == indent || ! ("off".equals(indent) || "false".equals(indent))) {
|
||||
|
|
|
@ -17,27 +17,37 @@
|
|||
|
||||
package org.apache.solr.schema;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
|
||||
import org.apache.lucene.analysis.util.*;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.apache.solr.core.Config;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.util.plugin.AbstractPluginLoader;
|
||||
import org.w3c.dom.*;
|
||||
|
||||
import javax.xml.xpath.XPath;
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import javax.xml.xpath.XPathExpressionException;
|
||||
import javax.xml.xpath.XPathFactory;
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizerFactory;
|
||||
import org.apache.lucene.analysis.util.AbstractAnalysisFactory;
|
||||
import org.apache.lucene.analysis.util.CharFilterFactory;
|
||||
import org.apache.lucene.analysis.util.MultiTermAwareComponent;
|
||||
import org.apache.lucene.analysis.util.TokenFilterFactory;
|
||||
import org.apache.lucene.analysis.util.TokenizerFactory;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.solr.analysis.TokenizerChain;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.core.Config;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.apache.solr.util.plugin.AbstractPluginLoader;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.NamedNodeMap;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
public final class FieldTypePluginLoader
|
||||
extends AbstractPluginLoader<FieldType> {
|
||||
|
@ -136,7 +146,7 @@ public final class FieldTypePluginLoader
|
|||
@Override
|
||||
protected void init(FieldType plugin, Node node) throws Exception {
|
||||
|
||||
Map<String,String> params = DOMUtil.toMapExcept( node.getAttributes(), "name");
|
||||
Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), NAME);
|
||||
plugin.setArgs(schema, params);
|
||||
}
|
||||
|
||||
|
|
|
@ -44,6 +44,8 @@ import org.apache.solr.search.Sorting;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.JSON;
|
||||
|
||||
/**
|
||||
* Pre-analyzed field type provides a way to index a serialized token stream,
|
||||
* optionally with an independent stored value of a field.
|
||||
|
@ -70,7 +72,7 @@ public class PreAnalyzedField extends FieldType {
|
|||
parser = new JsonPreAnalyzedParser();
|
||||
} else {
|
||||
// short name
|
||||
if ("json".equalsIgnoreCase(implName)) {
|
||||
if (JSON.equalsIgnoreCase(implName)) {
|
||||
parser = new JsonPreAnalyzedParser();
|
||||
} else if ("simple".equalsIgnoreCase(implName)) {
|
||||
parser = new SimplePreAnalyzedParser();
|
||||
|
|
|
@ -17,22 +17,22 @@
|
|||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.MapSerializable;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.apache.solr.common.util.StrUtils;
|
||||
import org.apache.solr.core.MapSerializable;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* Contains the knowledge of how cache config is
|
||||
|
@ -106,9 +106,9 @@ public class CacheConfig implements MapSerializable{
|
|||
}
|
||||
config.args = mapCopy;
|
||||
}
|
||||
String nameAttr = config.args.get("name"); // OPTIONAL
|
||||
String nameAttr = config.args.get(NAME); // OPTIONAL
|
||||
if (nameAttr==null) {
|
||||
config.args.put("name",config.nodeName);
|
||||
config.args.put(NAME, config.nodeName);
|
||||
}
|
||||
|
||||
SolrResourceLoader loader = solrConfig.getResourceLoader();
|
||||
|
|
|
@ -16,12 +16,6 @@ package org.apache.solr.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.util.ConcurrentLFUCache;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.net.URL;
|
||||
import java.util.List;
|
||||
|
@ -30,6 +24,14 @@ import java.util.Set;
|
|||
import java.util.concurrent.CopyOnWriteArrayList;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.util.ConcurrentLFUCache;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* SolrCache based on ConcurrentLFUCache implementation.
|
||||
* <p>
|
||||
|
@ -64,7 +66,7 @@ public class LFUCache<K, V> implements SolrCache<K, V> {
|
|||
public Object init(Map args, Object persistence, CacheRegenerator regenerator) {
|
||||
state = State.CREATED;
|
||||
this.regenerator = regenerator;
|
||||
name = (String) args.get("name");
|
||||
name = (String) args.get(NAME);
|
||||
String str = (String) args.get("size");
|
||||
int limit = str == null ? 1024 : Integer.parseInt(str);
|
||||
int minLimit;
|
||||
|
|
|
@ -17,17 +17,17 @@
|
|||
|
||||
package org.apache.solr.search;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.math.BigDecimal;
|
||||
import java.math.RoundingMode;
|
||||
import java.net.URL;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrInfoMBean.Category;
|
||||
import org.apache.solr.search.SolrCache.State;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* Common base class of reusable functionality for SolrCaches
|
||||
*/
|
||||
|
@ -113,9 +113,9 @@ public abstract class SolrCacheBase {
|
|||
|
||||
public void init(Map<String, String> args, CacheRegenerator regenerator) {
|
||||
this.regenerator = regenerator;
|
||||
state=State.CREATED;
|
||||
name = (String) args.get("name");
|
||||
autowarm = new AutoWarmCountRef((String)args.get("autowarmCount"));
|
||||
state = State.CREATED;
|
||||
name = args.get(NAME);
|
||||
autowarm = new AutoWarmCountRef(args.get("autowarmCount"));
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
|
||||
package org.apache.solr.servlet;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -35,11 +36,8 @@ import java.util.HashMap;
|
|||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
|
||||
import org.apache.commons.fileupload.FileItem;
|
||||
import org.apache.commons.fileupload.disk.DiskFileItemFactory;
|
||||
import org.apache.commons.fileupload.servlet.ServletFileUpload;
|
||||
|
@ -52,7 +50,6 @@ import org.apache.solr.common.params.SolrParams;
|
|||
import org.apache.solr.common.util.ContentStream;
|
||||
import org.apache.solr.common.util.ContentStreamBase;
|
||||
import org.apache.solr.common.util.FastInputStream;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.RequestHandlers;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
@ -62,6 +59,8 @@ import org.apache.solr.util.RTimer;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
|
||||
public class SolrRequestParsers
|
||||
{
|
||||
|
@ -163,7 +162,7 @@ public class SolrRequestParsers
|
|||
|
||||
// Handlers and login will want to know the path. If it contains a ':'
|
||||
// the handler could use it for RESTful URLs
|
||||
sreq.getContext().put( "path", RequestHandlers.normalize(path) );
|
||||
sreq.getContext().put(PATH, RequestHandlers.normalize(path));
|
||||
sreq.getContext().put("httpMethod", req.getMethod());
|
||||
|
||||
if(addHttpRequestToContext) {
|
||||
|
|
|
@ -17,6 +17,9 @@
|
|||
|
||||
package org.apache.solr.servlet;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStreamWriter;
|
||||
import java.io.Writer;
|
||||
|
@ -32,12 +35,8 @@ import java.util.Map;
|
|||
import java.util.Set;
|
||||
import java.util.SortedMap;
|
||||
import java.util.TreeMap;
|
||||
import java.util.regex.Pattern;
|
||||
import java.util.regex.Matcher;
|
||||
|
||||
import javax.servlet.ServletException;
|
||||
import javax.servlet.http.HttpServletRequest;
|
||||
import javax.servlet.http.HttpServletResponse;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.cloud.ZkController;
|
||||
|
@ -60,6 +59,7 @@ import org.noggit.ObjectBuilder;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.PATH;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -354,7 +354,7 @@ public final class ZookeeperInfoServlet extends BaseSolrServlet {
|
|||
return;
|
||||
}
|
||||
|
||||
String path = params.get("path");
|
||||
String path = params.get(PATH);
|
||||
String addr = params.get("addr");
|
||||
|
||||
if (addr != null && addr.length() == 0) {
|
||||
|
@ -785,7 +785,7 @@ public final class ZookeeperInfoServlet extends BaseSolrServlet {
|
|||
json.writeNameSeparator();
|
||||
json.startObject();
|
||||
|
||||
writeKeyValue(json, "path", path, true);
|
||||
writeKeyValue(json, PATH, path, true);
|
||||
|
||||
json.writeValueSeparator();
|
||||
json.writeString("prop");
|
||||
|
|
|
@ -37,6 +37,8 @@ import org.apache.solr.search.SolrIndexSearcher;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* Responsible for loading the lookup and dictionary Implementations specified by
|
||||
* the SolrConfig.
|
||||
|
@ -49,9 +51,6 @@ public class SolrSuggester implements Accountable {
|
|||
/** Name used when an unnamed suggester config is passed */
|
||||
public static final String DEFAULT_DICT_NAME = "default";
|
||||
|
||||
/** Label to identify the name of the suggester */
|
||||
public static final String NAME = "name";
|
||||
|
||||
/** Location of the source data - either a path to a file, or null for the
|
||||
* current IndexReader.
|
||||
* */
|
||||
|
|
|
@ -17,7 +17,12 @@ package org.apache.solr.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
@ -26,6 +31,8 @@ import org.w3c.dom.NamedNodeMap;
|
|||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
|
@ -137,7 +144,7 @@ public class DOMUtil {
|
|||
|
||||
final String type = nd.getNodeName();
|
||||
|
||||
final String name = getAttr(nd, "name");
|
||||
final String name = getAttr(nd, NAME);
|
||||
|
||||
Object val=null;
|
||||
|
||||
|
|
|
@ -17,6 +17,25 @@ package org.apache.solr.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.net.ConnectException;
|
||||
import java.net.SocketException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Enumeration;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
|
||||
import org.apache.commons.cli.CommandLine;
|
||||
import org.apache.commons.cli.GnuParser;
|
||||
import org.apache.commons.cli.HelpFormatter;
|
||||
|
@ -53,30 +72,14 @@ import org.apache.solr.common.cloud.Replica;
|
|||
import org.apache.solr.common.cloud.Slice;
|
||||
import org.apache.solr.common.cloud.ZkCoreNodeProps;
|
||||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.noggit.CharArr;
|
||||
import org.noggit.JSONParser;
|
||||
import org.noggit.JSONWriter;
|
||||
import org.noggit.ObjectBuilder;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.PrintStream;
|
||||
import java.net.ConnectException;
|
||||
import java.net.SocketException;
|
||||
import java.net.URL;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Enumeration;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
import java.util.zip.ZipEntry;
|
||||
import java.util.zip.ZipInputStream;
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* Command-line utility for working with Solr.
|
||||
|
@ -498,7 +501,7 @@ public class SolrCLI {
|
|||
@SuppressWarnings({"unchecked"})
|
||||
public static Map<String,Object> getJson(HttpClient httpClient, String getUrl) throws Exception {
|
||||
// ensure we're requesting JSON back from Solr
|
||||
HttpGet httpGet = new HttpGet(new URIBuilder(getUrl).setParameter("wt", "json").build());
|
||||
HttpGet httpGet = new HttpGet(new URIBuilder(getUrl).setParameter(CommonParams.WT, CommonParams.JSON).build());
|
||||
// make the request and get back a parsed JSON object
|
||||
Map<String,Object> json = httpClient.execute(httpGet, new SolrResponseHandler());
|
||||
// check the response JSON from Solr to see if it is an error
|
||||
|
@ -793,7 +796,7 @@ public class SolrCLI {
|
|||
|
||||
public Map<String,Object> asMap() {
|
||||
Map<String,Object> map = new LinkedHashMap<String,Object>();
|
||||
map.put("name", name);
|
||||
map.put(NAME, name);
|
||||
map.put("url", url);
|
||||
map.put("numDocs", numDocs);
|
||||
map.put("status", status);
|
||||
|
@ -1043,7 +1046,7 @@ public class SolrCLI {
|
|||
.hasArg()
|
||||
.isRequired(true)
|
||||
.withDescription("Name of collection to create.")
|
||||
.create("name"),
|
||||
.create(NAME),
|
||||
OptionBuilder
|
||||
.withArgName("#")
|
||||
.hasArg()
|
||||
|
@ -1177,7 +1180,7 @@ public class SolrCLI {
|
|||
"there is at least 1 live node in the cluster.");
|
||||
String firstLiveNode = liveNodes.iterator().next();
|
||||
|
||||
String collectionName = cli.getOptionValue("name");
|
||||
String collectionName = cli.getOptionValue(NAME);
|
||||
|
||||
// build a URL to create the collection
|
||||
int numShards = optionAsInt(cli, "shards", 1);
|
||||
|
@ -1322,7 +1325,7 @@ public class SolrCLI {
|
|||
.hasArg()
|
||||
.isRequired(true)
|
||||
.withDescription("Name of the core to create.")
|
||||
.create("name"),
|
||||
.create(NAME),
|
||||
OptionBuilder
|
||||
.withArgName("CONFIG")
|
||||
.hasArg()
|
||||
|
@ -1362,7 +1365,7 @@ public class SolrCLI {
|
|||
}
|
||||
}
|
||||
|
||||
String coreName = cli.getOptionValue("name");
|
||||
String coreName = cli.getOptionValue(NAME);
|
||||
|
||||
String systemInfoUrl = solrUrl+"admin/info/system";
|
||||
CloseableHttpClient httpClient = getHttpClient();
|
||||
|
@ -1453,7 +1456,7 @@ public class SolrCLI {
|
|||
Map<String,Object> existsCheckResult = getJson(coreStatusUrl);
|
||||
Map<String,Object> status = (Map<String, Object>)existsCheckResult.get("status");
|
||||
Map<String,Object> coreStatus = (Map<String, Object>)status.get(coreName);
|
||||
exists = coreStatus != null && coreStatus.containsKey("name");
|
||||
exists = coreStatus != null && coreStatus.containsKey(NAME);
|
||||
} catch (Exception exc) {
|
||||
// just ignore it since we're only interested in a positive result here
|
||||
}
|
||||
|
@ -1529,7 +1532,7 @@ public class SolrCLI {
|
|||
.hasArg()
|
||||
.isRequired(true)
|
||||
.withDescription("Name of the core / collection to delete.")
|
||||
.create("name"),
|
||||
.create(NAME),
|
||||
OptionBuilder
|
||||
.withArgName("true|false")
|
||||
.hasArg()
|
||||
|
@ -1610,7 +1613,7 @@ public class SolrCLI {
|
|||
ZkStateReader zkStateReader = cloudSolrClient.getZkStateReader();
|
||||
String baseUrl = zkStateReader.getBaseUrlForNodeName(firstLiveNode);
|
||||
|
||||
String collectionName = cli.getOptionValue("name");
|
||||
String collectionName = cli.getOptionValue(NAME);
|
||||
|
||||
if (!zkStateReader.getClusterState().hasCollection(collectionName)) {
|
||||
System.err.println("\nERROR: Collection "+collectionName+" not found!");
|
||||
|
@ -1686,7 +1689,7 @@ public class SolrCLI {
|
|||
protected int deleteCore(CommandLine cli, CloseableHttpClient httpClient, String solrUrl) throws Exception {
|
||||
|
||||
int status = 0;
|
||||
String coreName = cli.getOptionValue("name");
|
||||
String coreName = cli.getOptionValue(NAME);
|
||||
String deleteCoreUrl =
|
||||
String.format(Locale.ROOT,
|
||||
"%sadmin/cores?action=UNLOAD&core=%s&deleteIndex=true&deleteDataDir=true&deleteInstanceDir=true",
|
||||
|
|
|
@ -19,17 +19,18 @@ package org.apache.solr.util.plugin;
|
|||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrException.ErrorCode;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.apache.solr.core.SolrConfig;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
* An abstract super class that manages standard solr-style plugin configuration.
|
||||
*
|
||||
|
@ -144,7 +145,7 @@ public abstract class AbstractPluginLoader<T>
|
|||
|
||||
String name = null;
|
||||
try {
|
||||
name = DOMUtil.getAttr(node,"name", requireName?type:null);
|
||||
name = DOMUtil.getAttr(node, NAME, requireName ? type : null);
|
||||
String className = DOMUtil.getAttr(node,"class", type);
|
||||
String defaultStr = DOMUtil.getAttr(node,"default", null );
|
||||
|
||||
|
@ -222,7 +223,7 @@ public abstract class AbstractPluginLoader<T>
|
|||
T plugin = null;
|
||||
|
||||
try {
|
||||
String name = DOMUtil.getAttr(node, "name", requireName ? type : null);
|
||||
String name = DOMUtil.getAttr(node, NAME, requireName ? type : null);
|
||||
String className = DOMUtil.getAttr(node, "class", type);
|
||||
plugin = create(loader, name, className, node);
|
||||
log.debug("created " + name + ": " + plugin.getClass().getName());
|
||||
|
@ -265,13 +266,11 @@ public abstract class AbstractPluginLoader<T>
|
|||
* Internal class to hold onto initialization info so that it can be initialized
|
||||
* after it is registered.
|
||||
*/
|
||||
private class PluginInitInfo
|
||||
{
|
||||
private class PluginInitInfo {
|
||||
final T plugin;
|
||||
final Node node;
|
||||
|
||||
PluginInitInfo( T plugin, Node node )
|
||||
{
|
||||
|
||||
PluginInitInfo(T plugin, Node node) {
|
||||
this.plugin = plugin;
|
||||
this.node = node;
|
||||
}
|
||||
|
|
|
@ -22,6 +22,8 @@ import java.util.Map;
|
|||
import org.apache.solr.util.DOMUtil;
|
||||
import org.w3c.dom.Node;
|
||||
|
||||
import static org.apache.solr.common.params.CommonParams.NAME;
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
|
@ -38,7 +40,7 @@ public class MapPluginLoader<T extends MapInitializedPlugin> extends AbstractPlu
|
|||
|
||||
@Override
|
||||
protected void init(T plugin, Node node) throws Exception {
|
||||
Map<String,String> params = DOMUtil.toMapExcept( node.getAttributes(), "name","class" );
|
||||
Map<String, String> params = DOMUtil.toMapExcept(node.getAttributes(), NAME, "class");
|
||||
plugin.init( params );
|
||||
}
|
||||
|
||||
|
|
|
@ -66,7 +66,7 @@ public class LeaderInitiatedRecoveryOnCommitTest extends BasicDistributedZkTest
|
|||
}
|
||||
|
||||
private void multiShardTest() throws Exception {
|
||||
// create a collection that has 1 shard and 3 replicas
|
||||
// create a collection that has 2 shard and 2 replicas
|
||||
String testCollectionName = "c8n_2x2_commits";
|
||||
createCollection(testCollectionName, 2, 2, 1);
|
||||
cloudClient.setDefaultCollection(testCollectionName);
|
||||
|
|
|
@ -10,11 +10,11 @@ set SDIR=%~dp0
|
|||
if "%SDIR:~-1%"=="\" set SDIR=%SDIR:~0,-1%
|
||||
|
||||
IF exist %SDIR%\..\..\solr-webapp\webapp\nul (
|
||||
echo %SDIR%\....\..\solr-webapp\webapp exists
|
||||
echo %SDIR%\..\..\solr-webapp\webapp exists
|
||||
) ELSE (
|
||||
echo -------------------
|
||||
echo Unzip example\webapps\solr.war to example\solr-webapp\. to use this script.
|
||||
echo Starting the Solr example via start.jar will also do this extraction.
|
||||
echo Unzip server\webapps\solr.war to server\solr-webapp\. to use this script.
|
||||
echo Starting Solr via "bin\solr.cmd start" will also do this extraction.
|
||||
echo -------------------
|
||||
)
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue