diff --git a/core/src/main/java/org/apache/lucene/analysis/synonym/GraphTokenStreamFiniteStrings.java b/core/src/main/java/org/apache/lucene/analysis/synonym/GraphTokenStreamFiniteStrings.java new file mode 100644 index 00000000000..3d806588eca --- /dev/null +++ b/core/src/main/java/org/apache/lucene/analysis/synonym/GraphTokenStreamFiniteStrings.java @@ -0,0 +1,291 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.synonym; + +import static org.apache.lucene.util.automaton.Operations.DEFAULT_MAX_DETERMINIZED_STATES; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.BytesTermAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.FiniteStringsIterator; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.Transition; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Creates a list of {@link TokenStream} where each stream is the tokens that make up a finite string in graph token stream. To do this, + * the graph token stream is converted to an {@link Automaton} and from there we use a {@link FiniteStringsIterator} to collect the various + * token streams for each finite string. + */ +public class GraphTokenStreamFiniteStrings { + private final Automaton.Builder builder; + Automaton det; + private final Map termToID = new HashMap<>(); + private final Map idToTerm = new HashMap<>(); + private int anyTermID = -1; + + public GraphTokenStreamFiniteStrings() { + this.builder = new Automaton.Builder(); + } + + private static class BytesRefArrayTokenStream extends TokenStream { + private final BytesTermAttribute termAtt = addAttribute(BytesTermAttribute.class); + private final BytesRef[] terms; + private int offset; + + BytesRefArrayTokenStream(BytesRef[] terms) { + this.terms = terms; + offset = 0; + } + + @Override + public boolean incrementToken() throws IOException { + if (offset < terms.length) { + clearAttributes(); + termAtt.setBytesRef(terms[offset]); + offset = offset + 1; + return true; + } + + return false; + } + } + + /** + * Gets + */ + public List getTokenStreams(final TokenStream in) throws IOException { + // build automation + build(in); + + List tokenStreams = new ArrayList<>(); + final FiniteStringsIterator finiteStrings = new FiniteStringsIterator(det); + for (IntsRef string; (string = finiteStrings.next()) != null; ) { + final BytesRef[] tokens = new BytesRef[string.length]; + for (int idx = string.offset, len = string.offset + string.length; idx < len; idx++) { + tokens[idx - string.offset] = idToTerm.get(string.ints[idx]); + } + + tokenStreams.add(new BytesRefArrayTokenStream(tokens)); + } + + return tokenStreams; + } + + private void build(final TokenStream in) throws IOException { + if (det != null) { + throw new IllegalStateException("Automation already built"); + } + + final TermToBytesRefAttribute termBytesAtt = in.addAttribute(TermToBytesRefAttribute.class); + final PositionIncrementAttribute posIncAtt = in.addAttribute(PositionIncrementAttribute.class); + final PositionLengthAttribute posLengthAtt = in.addAttribute(PositionLengthAttribute.class); + final OffsetAttribute offsetAtt = in.addAttribute(OffsetAttribute.class); + + in.reset(); + + int pos = -1; + int lastPos = 0; + int maxOffset = 0; + int maxPos = -1; + int state = -1; + while (in.incrementToken()) { + int posInc = posIncAtt.getPositionIncrement(); + assert pos > -1 || posInc > 0; + + if (posInc > 1) { + throw new IllegalArgumentException("cannot handle holes; to accept any term, use '*' term"); + } + + if (posInc > 0) { + // New node: + pos += posInc; + } + + int endPos = pos + posLengthAtt.getPositionLength(); + while (state < endPos) { + state = createState(); + } + + BytesRef term = termBytesAtt.getBytesRef(); + //System.out.println(pos + "-" + endPos + ": " + term.utf8ToString() + ": posInc=" + posInc); + if (term.length == 1 && term.bytes[term.offset] == (byte) '*') { + addAnyTransition(pos, endPos); + } else { + addTransition(pos, endPos, term); + } + + maxOffset = Math.max(maxOffset, offsetAtt.endOffset()); + maxPos = Math.max(maxPos, endPos); + } + + in.end(); + + // TODO: look at endOffset? ts2a did... + + // TODO: this (setting "last" state as the only accept state) may be too simplistic? + setAccept(state, true); + finish(); + } + + /** + * Returns a new state; state 0 is always the initial state. + */ + private int createState() { + return builder.createState(); + } + + /** + * Marks the specified state as accept or not. + */ + private void setAccept(int state, boolean accept) { + builder.setAccept(state, accept); + } + + /** + * Adds a transition to the automaton. + */ + private void addTransition(int source, int dest, String term) { + addTransition(source, dest, new BytesRef(term)); + } + + /** + * Adds a transition to the automaton. + */ + private void addTransition(int source, int dest, BytesRef term) { + if (term == null) { + throw new NullPointerException("term should not be null"); + } + builder.addTransition(source, dest, getTermID(term)); + } + + /** + * Adds a transition matching any term. + */ + private void addAnyTransition(int source, int dest) { + builder.addTransition(source, dest, getTermID(null)); + } + + /** + * Call this once you are done adding states/transitions. + */ + private void finish() { + finish(DEFAULT_MAX_DETERMINIZED_STATES); + } + + /** + * Call this once you are done adding states/transitions. + * + * @param maxDeterminizedStates Maximum number of states created when determinizing the automaton. Higher numbers allow this operation + * to consume more memory but allow more complex automatons. + */ + private void finish(int maxDeterminizedStates) { + Automaton automaton = builder.finish(); + + // System.out.println("before det:\n" + automaton.toDot()); + + Transition t = new Transition(); + + // TODO: should we add "eps back to initial node" for all states, + // and det that? then we don't need to revisit initial node at + // every position? but automaton could blow up? And, this makes it + // harder to skip useless positions at search time? + + if (anyTermID != -1) { + + // Make sure there are no leading or trailing ANY: + int count = automaton.initTransition(0, t); + for (int i = 0; i < count; i++) { + automaton.getNextTransition(t); + if (anyTermID >= t.min && anyTermID <= t.max) { + throw new IllegalStateException("automaton cannot lead with an ANY transition"); + } + } + + int numStates = automaton.getNumStates(); + for (int i = 0; i < numStates; i++) { + count = automaton.initTransition(i, t); + for (int j = 0; j < count; j++) { + automaton.getNextTransition(t); + if (automaton.isAccept(t.dest) && anyTermID >= t.min && anyTermID <= t.max) { + throw new IllegalStateException("automaton cannot end with an ANY transition"); + } + } + } + + int termCount = termToID.size(); + + // We have to carefully translate these transitions so automaton + // realizes they also match all other terms: + Automaton newAutomaton = new Automaton(); + for (int i = 0; i < numStates; i++) { + newAutomaton.createState(); + newAutomaton.setAccept(i, automaton.isAccept(i)); + } + + for (int i = 0; i < numStates; i++) { + count = automaton.initTransition(i, t); + for (int j = 0; j < count; j++) { + automaton.getNextTransition(t); + int min, max; + if (t.min <= anyTermID && anyTermID <= t.max) { + // Match any term + min = 0; + max = termCount - 1; + } else { + min = t.min; + max = t.max; + } + newAutomaton.addTransition(t.source, t.dest, min, max); + } + } + newAutomaton.finishState(); + automaton = newAutomaton; + } + + det = Operations.removeDeadStates(Operations.determinize(automaton, maxDeterminizedStates)); + } + + private int getTermID(BytesRef term) { + Integer id = termToID.get(term); + if (id == null) { + id = termToID.size(); + if (term != null) { + term = BytesRef.deepCopyOf(term); + } + termToID.put(term, id); + idToTerm.put(id, term); + if (term == null) { + anyTermID = id; + } + } + + return id; + } +} diff --git a/core/src/main/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java b/core/src/main/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java new file mode 100644 index 00000000000..8249011c114 --- /dev/null +++ b/core/src/main/java/org/apache/lucene/analysis/synonym/SynonymGraphFilter.java @@ -0,0 +1,588 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.synonym; + +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.store.ByteArrayDataInput; +import org.apache.lucene.util.AttributeSource; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.XRollingBuffer; +import org.apache.lucene.util.fst.FST; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.LinkedList; +import java.util.List; + +// TODO: maybe we should resolve token -> wordID then run +// FST on wordIDs, for better perf? + +// TODO: a more efficient approach would be Aho/Corasick's +// algorithm +// http://en.wikipedia.org/wiki/Aho%E2%80%93Corasick_string_matching_algorithm +// It improves over the current approach here +// because it does not fully re-start matching at every +// token. For example if one pattern is "a b c x" +// and another is "b c d" and the input is "a b c d", on +// trying to parse "a b c x" but failing when you got to x, +// rather than starting over again your really should +// immediately recognize that "b c d" matches at the next +// input. I suspect this won't matter that much in +// practice, but it's possible on some set of synonyms it +// will. We'd have to modify Aho/Corasick to enforce our +// conflict resolving (eg greedy matching) because that algo +// finds all matches. This really amounts to adding a .* +// closure to the FST and then determinizing it. +// +// Another possible solution is described at http://www.cis.uni-muenchen.de/people/Schulz/Pub/dictle5.ps + +/** + * Applies single- or multi-token synonyms from a {@link SynonymMap} + * to an incoming {@link TokenStream}, producing a fully correct graph + * output. This is a replacement for {@link SynonymFilter}, which produces + * incorrect graphs for multi-token synonyms. + * + * NOTE: this cannot consume an incoming graph; results will + * be undefined. + */ +public final class SynonymGraphFilter extends TokenFilter { + + public static final String TYPE_SYNONYM = "SYNONYM"; + public static final int GRAPH_FLAG = 8; + + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class); + private final PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class); + private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class); + + private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class); + private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); + + private final SynonymMap synonyms; + private final boolean ignoreCase; + + private final FST fst; + + private final FST.BytesReader fstReader; + private final FST.Arc scratchArc; + private final ByteArrayDataInput bytesReader = new ByteArrayDataInput(); + private final BytesRef scratchBytes = new BytesRef(); + private final CharsRefBuilder scratchChars = new CharsRefBuilder(); + private final LinkedList outputBuffer = new LinkedList<>(); + + private int nextNodeOut; + private int lastNodeOut; + private int maxLookaheadUsed; + + // For testing: + private int captureCount; + + private boolean liveToken; + + // Start/end offset of the current match: + private int matchStartOffset; + private int matchEndOffset; + + // True once the input TokenStream is exhausted: + private boolean finished; + + private int lookaheadNextRead; + private int lookaheadNextWrite; + + private XRollingBuffer lookahead = new XRollingBuffer() { + @Override + protected BufferedInputToken newInstance() { + return new BufferedInputToken(); + } + }; + + static class BufferedInputToken implements XRollingBuffer.Resettable { + final CharsRefBuilder term = new CharsRefBuilder(); + AttributeSource.State state; + int startOffset = -1; + int endOffset = -1; + + @Override + public void reset() { + state = null; + term.clear(); + + // Intentionally invalid to ferret out bugs: + startOffset = -1; + endOffset = -1; + } + } + + static class BufferedOutputToken { + final String term; + + // Non-null if this was an incoming token: + final State state; + + final int startNode; + final int endNode; + + public BufferedOutputToken(State state, String term, int startNode, int endNode) { + this.state = state; + this.term = term; + this.startNode = startNode; + this.endNode = endNode; + } + } + + public SynonymGraphFilter(TokenStream input, SynonymMap synonyms, boolean ignoreCase) { + super(input); + this.synonyms = synonyms; + this.fst = synonyms.fst; + if (fst == null) { + throw new IllegalArgumentException("fst must be non-null"); + } + this.fstReader = fst.getBytesReader(); + scratchArc = new FST.Arc<>(); + this.ignoreCase = ignoreCase; + } + + @Override + public boolean incrementToken() throws IOException { + //System.out.println("\nS: incrToken lastNodeOut=" + lastNodeOut + " nextNodeOut=" + nextNodeOut); + + assert lastNodeOut <= nextNodeOut; + + if (outputBuffer.isEmpty() == false) { + // We still have pending outputs from a prior synonym match: + releaseBufferedToken(); + //System.out.println(" syn: ret buffered=" + this); + assert liveToken == false; + return true; + } + + // Try to parse a new synonym match at the current token: + + if (parse()) { + // A new match was found: + releaseBufferedToken(); + //System.out.println(" syn: after parse, ret buffered=" + this); + assert liveToken == false; + return true; + } + + if (lookaheadNextRead == lookaheadNextWrite) { + + // Fast path: parse pulled one token, but it didn't match + // the start for any synonym, so we now return it "live" w/o having + // cloned all of its atts: + if (finished) { + //System.out.println(" syn: ret END"); + return false; + } + + assert liveToken; + liveToken = false; + + // NOTE: no need to change posInc since it's relative, i.e. whatever + // node our output is upto will just increase by the incoming posInc. + // We also don't need to change posLen, but only because we cannot + // consume a graph, so the incoming token can never span a future + // synonym match. + + } else { + // We still have buffered lookahead tokens from a previous + // parse attempt that required lookahead; just replay them now: + //System.out.println(" restore buffer"); + assert lookaheadNextRead < lookaheadNextWrite : "read=" + lookaheadNextRead + " write=" + lookaheadNextWrite; + BufferedInputToken token = lookahead.get(lookaheadNextRead); + lookaheadNextRead++; + + restoreState(token.state); + + lookahead.freeBefore(lookaheadNextRead); + + //System.out.println(" after restore offset=" + offsetAtt.startOffset() + "-" + offsetAtt.endOffset()); + assert liveToken == false; + } + + lastNodeOut += posIncrAtt.getPositionIncrement(); + nextNodeOut = lastNodeOut + posLenAtt.getPositionLength(); + + //System.out.println(" syn: ret lookahead=" + this); + + return true; + } + + private void releaseBufferedToken() throws IOException { + //System.out.println(" releaseBufferedToken"); + + BufferedOutputToken token = outputBuffer.pollFirst(); + + if (token.state != null) { + // This is an original input token (keepOrig=true case): + //System.out.println(" hasState"); + restoreState(token.state); + //System.out.println(" startOffset=" + offsetAtt.startOffset() + " endOffset=" + offsetAtt.endOffset()); + } else { + clearAttributes(); + //System.out.println(" no state"); + termAtt.append(token.term); + + // We better have a match already: + assert matchStartOffset != -1; + + offsetAtt.setOffset(matchStartOffset, matchEndOffset); + //System.out.println(" startOffset=" + matchStartOffset + " endOffset=" + matchEndOffset); + typeAtt.setType(TYPE_SYNONYM); + } + + //System.out.println(" lastNodeOut=" + lastNodeOut); + //System.out.println(" term=" + termAtt); + + posIncrAtt.setPositionIncrement(token.startNode - lastNodeOut); + lastNodeOut = token.startNode; + posLenAtt.setPositionLength(token.endNode - token.startNode); + flagsAtt.setFlags(flagsAtt.getFlags() | GRAPH_FLAG); // set the graph flag + } + + /** + * Scans the next input token(s) to see if a synonym matches. Returns true + * if a match was found. + */ + private boolean parse() throws IOException { + // System.out.println(Thread.currentThread().getName() + ": S: parse: " + System.identityHashCode(this)); + + // Holds the longest match we've seen so far: + BytesRef matchOutput = null; + int matchInputLength = 0; + + BytesRef pendingOutput = fst.outputs.getNoOutput(); + fst.getFirstArc(scratchArc); + + assert scratchArc.output == fst.outputs.getNoOutput(); + + // How many tokens in the current match + int matchLength = 0; + boolean doFinalCapture = false; + + int lookaheadUpto = lookaheadNextRead; + matchStartOffset = -1; + + byToken: + while (true) { + //System.out.println(" cycle lookaheadUpto=" + lookaheadUpto + " maxPos=" + lookahead.getMaxPos()); + + // Pull next token's chars: + final char[] buffer; + final int bufferLen; + final int inputEndOffset; + + if (lookaheadUpto <= lookahead.getMaxPos()) { + // Still in our lookahead buffer + BufferedInputToken token = lookahead.get(lookaheadUpto); + lookaheadUpto++; + buffer = token.term.chars(); + bufferLen = token.term.length(); + inputEndOffset = token.endOffset; + //System.out.println(" use buffer now max=" + lookahead.getMaxPos()); + if (matchStartOffset == -1) { + matchStartOffset = token.startOffset; + } + } else { + + // We used up our lookahead buffer of input tokens + // -- pull next real input token: + + assert finished || liveToken == false; + + if (finished) { + //System.out.println(" break: finished"); + break; + } else if (input.incrementToken()) { + //System.out.println(" input.incrToken"); + liveToken = true; + buffer = termAtt.buffer(); + bufferLen = termAtt.length(); + if (matchStartOffset == -1) { + matchStartOffset = offsetAtt.startOffset(); + } + inputEndOffset = offsetAtt.endOffset(); + + lookaheadUpto++; + } else { + // No more input tokens + finished = true; + //System.out.println(" break: now set finished"); + break; + } + } + + matchLength++; + //System.out.println(" cycle term=" + new String(buffer, 0, bufferLen)); + + // Run each char in this token through the FST: + int bufUpto = 0; + while (bufUpto < bufferLen) { + final int codePoint = Character.codePointAt(buffer, bufUpto, bufferLen); + if (fst.findTargetArc(ignoreCase ? Character.toLowerCase(codePoint) : codePoint, scratchArc, scratchArc, fstReader) == + null) { + break byToken; + } + + // Accum the output + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + bufUpto += Character.charCount(codePoint); + } + + assert bufUpto == bufferLen; + + // OK, entire token matched; now see if this is a final + // state in the FST (a match): + if (scratchArc.isFinal()) { + matchOutput = fst.outputs.add(pendingOutput, scratchArc.nextFinalOutput); + matchInputLength = matchLength; + matchEndOffset = inputEndOffset; + //System.out.println(" ** match"); + } + + // See if the FST can continue matching (ie, needs to + // see the next input token): + if (fst.findTargetArc(SynonymMap.WORD_SEPARATOR, scratchArc, scratchArc, fstReader) == null) { + // No further rules can match here; we're done + // searching for matching rules starting at the + // current input position. + break; + } else { + // More matching is possible -- accum the output (if + // any) of the WORD_SEP arc: + pendingOutput = fst.outputs.add(pendingOutput, scratchArc.output); + doFinalCapture = true; + if (liveToken) { + capture(); + } + } + } + + if (doFinalCapture && liveToken && finished == false) { + // Must capture the final token if we captured any prior tokens: + capture(); + } + + if (matchOutput != null) { + + if (liveToken) { + // Single input token synonym; we must buffer it now: + capture(); + } + + // There is a match! + bufferOutputTokens(matchOutput, matchInputLength); + lookaheadNextRead += matchInputLength; + //System.out.println(" precmatch; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos()); + lookahead.freeBefore(lookaheadNextRead); + //System.out.println(" match; set lookaheadNextRead=" + lookaheadNextRead + " now max=" + lookahead.getMaxPos()); + return true; + } else { + //System.out.println(" no match; lookaheadNextRead=" + lookaheadNextRead); + return false; + } + + //System.out.println(" parse done inputSkipCount=" + inputSkipCount + " nextRead=" + nextRead + " nextWrite=" + nextWrite); + } + + /** + * Expands the output graph into the necessary tokens, adding + * synonyms as side paths parallel to the input tokens, and + * buffers them in the output token buffer. + */ + private void bufferOutputTokens(BytesRef bytes, int matchInputLength) { + bytesReader.reset(bytes.bytes, bytes.offset, bytes.length); + + final int code = bytesReader.readVInt(); + final boolean keepOrig = (code & 0x1) == 0; + //System.out.println(" buffer: keepOrig=" + keepOrig + " matchInputLength=" + matchInputLength); + + // How many nodes along all paths; we need this to assign the + // node ID for the final end node where all paths merge back: + int totalPathNodes; + if (keepOrig) { + assert matchInputLength > 0; + totalPathNodes = matchInputLength - 1; + } else { + totalPathNodes = 0; + } + + // How many synonyms we will insert over this match: + final int count = code >>> 1; + + // TODO: we could encode this instead into the FST: + + // 1st pass: count how many new nodes we need + List> paths = new ArrayList<>(); + for (int outputIDX = 0; outputIDX < count; outputIDX++) { + int wordID = bytesReader.readVInt(); + synonyms.words.get(wordID, scratchBytes); + scratchChars.copyUTF8Bytes(scratchBytes); + int lastStart = 0; + + List path = new ArrayList<>(); + paths.add(path); + int chEnd = scratchChars.length(); + for (int chUpto = 0; chUpto <= chEnd; chUpto++) { + if (chUpto == chEnd || scratchChars.charAt(chUpto) == SynonymMap.WORD_SEPARATOR) { + path.add(new String(scratchChars.chars(), lastStart, chUpto - lastStart)); + lastStart = 1 + chUpto; + } + } + + assert path.size() > 0; + totalPathNodes += path.size() - 1; + } + //System.out.println(" totalPathNodes=" + totalPathNodes); + + // 2nd pass: buffer tokens for the graph fragment + + // NOTE: totalPathNodes will be 0 in the case where the matched + // input is a single token and all outputs are also a single token + + // We "spawn" a side-path for each of the outputs for this matched + // synonym, all ending back at this end node: + + int startNode = nextNodeOut; + + int endNode = startNode + totalPathNodes + 1; + //System.out.println(" " + paths.size() + " new side-paths"); + + // First, fanout all tokens departing start node for these new side paths: + int newNodeCount = 0; + for (List path : paths) { + int pathEndNode; + //System.out.println(" path size=" + path.size()); + if (path.size() == 1) { + // Single token output, so there are no intermediate nodes: + pathEndNode = endNode; + } else { + pathEndNode = nextNodeOut + newNodeCount + 1; + newNodeCount += path.size() - 1; + } + outputBuffer.add(new BufferedOutputToken(null, path.get(0), startNode, pathEndNode)); + } + + // We must do the original tokens last, else the offsets "go backwards": + if (keepOrig) { + BufferedInputToken token = lookahead.get(lookaheadNextRead); + int inputEndNode; + if (matchInputLength == 1) { + // Single token matched input, so there are no intermediate nodes: + inputEndNode = endNode; + } else { + inputEndNode = nextNodeOut + newNodeCount + 1; + } + + //System.out.println(" keepOrig first token: " + token.term); + + outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), startNode, inputEndNode)); + } + + nextNodeOut = endNode; + + // Do full side-path for each syn output: + for (int pathID = 0; pathID < paths.size(); pathID++) { + List path = paths.get(pathID); + if (path.size() > 1) { + int lastNode = outputBuffer.get(pathID).endNode; + for (int i = 1; i < path.size() - 1; i++) { + outputBuffer.add(new BufferedOutputToken(null, path.get(i), lastNode, lastNode + 1)); + lastNode++; + } + outputBuffer.add(new BufferedOutputToken(null, path.get(path.size() - 1), lastNode, endNode)); + } + } + + if (keepOrig && matchInputLength > 1) { + // Do full "side path" with the original tokens: + int lastNode = outputBuffer.get(paths.size()).endNode; + for (int i = 1; i < matchInputLength - 1; i++) { + BufferedInputToken token = lookahead.get(lookaheadNextRead + i); + outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), lastNode, lastNode + 1)); + lastNode++; + } + BufferedInputToken token = lookahead.get(lookaheadNextRead + matchInputLength - 1); + outputBuffer.add(new BufferedOutputToken(token.state, token.term.toString(), lastNode, endNode)); + } + + /* + System.out.println(" after buffer: " + outputBuffer.size() + " tokens:"); + for(BufferedOutputToken token : outputBuffer) { + System.out.println(" tok: " + token.term + " startNode=" + token.startNode + " endNode=" + token.endNode); + } + */ + } + + /** + * Buffers the current input token into lookahead buffer. + */ + private void capture() { + assert liveToken; + liveToken = false; + BufferedInputToken token = lookahead.get(lookaheadNextWrite); + lookaheadNextWrite++; + + token.state = captureState(); + token.startOffset = offsetAtt.startOffset(); + token.endOffset = offsetAtt.endOffset(); + assert token.term.length() == 0; + token.term.append(termAtt); + + captureCount++; + maxLookaheadUsed = Math.max(maxLookaheadUsed, lookahead.getBufferSize()); + //System.out.println(" maxLookaheadUsed=" + maxLookaheadUsed); + } + + @Override + public void reset() throws IOException { + super.reset(); + lookahead.reset(); + lookaheadNextWrite = 0; + lookaheadNextRead = 0; + captureCount = 0; + lastNodeOut = -1; + nextNodeOut = 0; + matchStartOffset = -1; + matchEndOffset = -1; + finished = false; + liveToken = false; + outputBuffer.clear(); + maxLookaheadUsed = 0; + //System.out.println("S: reset"); + } + + // for testing + int getCaptureCount() { + return captureCount; + } + + // for testing + int getMaxLookaheadUsed() { + return maxLookaheadUsed; + } +} diff --git a/core/src/main/java/org/apache/lucene/search/GraphQuery.java b/core/src/main/java/org/apache/lucene/search/GraphQuery.java new file mode 100644 index 00000000000..cad316d701c --- /dev/null +++ b/core/src/main/java/org/apache/lucene/search/GraphQuery.java @@ -0,0 +1,115 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.search; + +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Objects; + +/** + * A query that wraps multiple sub-queries generated from a graph token stream. + */ +public final class GraphQuery extends Query { + private final Query[] queries; + private final boolean hasBoolean; + + /** + * Constructor sets the queries and checks if any of them are + * a boolean query. + * + * @param queries the non-null array of queries + */ + public GraphQuery(Query... queries) { + this.queries = Objects.requireNonNull(queries).clone(); + for (Query query : queries) { + if (query instanceof BooleanQuery) { + hasBoolean = true; + return; + } + } + hasBoolean = false; + } + + /** + * Gets the queries + * + * @return unmodifiable list of Query + */ + public List getQueries() { + return Collections.unmodifiableList(Arrays.asList(queries)); + } + + /** + * If there is at least one boolean query or not. + * + * @return true if there is a boolean, false if not + */ + public boolean hasBoolean() { + return hasBoolean; + } + + /** + * Rewrites to a single query or a boolean query where each query is a SHOULD clause. + */ + @Override + public Query rewrite(IndexReader reader) throws IOException { + if (queries.length == 0) { + return new BooleanQuery.Builder().build(); + } + + if (queries.length == 1) { + return queries[0]; + } + + BooleanQuery.Builder q = new BooleanQuery.Builder(); + q.setDisableCoord(true); + for (Query clause : queries) { + q.add(clause, BooleanClause.Occur.SHOULD); + } + + return q.build(); + } + + @Override + public String toString(String field) { + StringBuilder builder = new StringBuilder("Graph("); + for (int i = 0; i < queries.length; i++) { + if (i != 0) { + builder.append(", "); + } + builder.append(Objects.toString(queries[i])); + } + builder.append(")"); + return builder.toString(); + } + + @Override + public boolean equals(Object other) { + return sameClassAs(other) && + Arrays.equals(queries, ((GraphQuery) other).queries); + } + + @Override + public int hashCode() { + return 31 * classHash() + Arrays.hashCode(queries); + } +} diff --git a/core/src/main/java/org/apache/lucene/util/XQueryBuilder.java b/core/src/main/java/org/apache/lucene/util/XQueryBuilder.java new file mode 100644 index 00000000000..dbf54b7c452 --- /dev/null +++ b/core/src/main/java/org/apache/lucene/util/XQueryBuilder.java @@ -0,0 +1,435 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.MultiPhraseQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; +import org.apache.lucene.search.TermQuery; + +/** + * Creates queries from the {@link Analyzer} chain. + *

+ * Example usage: + *

+ *   QueryBuilder builder = new QueryBuilder(analyzer);
+ *   Query a = builder.createBooleanQuery("body", "just a test");
+ *   Query b = builder.createPhraseQuery("body", "another test");
+ *   Query c = builder.createMinShouldMatchQuery("body", "another test", 0.5f);
+ * 
+ *

+ * This can also be used as a subclass for query parsers to make it easier + * to interact with the analysis chain. Factory methods such as {@code newTermQuery} + * are provided so that the generated queries can be customized. + * + * TODO: un-fork once we are on Lucene 6.4.0 + * This is only forked due to `createFieldQuery` being final and the analyze* methods being private. Lucene 6.4.0 removes final and will + * make the private methods protected allowing us to override it. + */ +public class XQueryBuilder { + private Analyzer analyzer; + private boolean enablePositionIncrements = true; + + /** Creates a new QueryBuilder using the given analyzer. */ + public XQueryBuilder(Analyzer analyzer) { + this.analyzer = analyzer; + } + + /** + * Creates a boolean query from the query text. + *

+ * This is equivalent to {@code createBooleanQuery(field, queryText, Occur.SHOULD)} + * @param field field name + * @param queryText text to be passed to the analyzer + * @return {@code TermQuery} or {@code BooleanQuery}, based on the analysis + * of {@code queryText} + */ + public Query createBooleanQuery(String field, String queryText) { + return createBooleanQuery(field, queryText, BooleanClause.Occur.SHOULD); + } + + /** + * Creates a boolean query from the query text. + *

+ * @param field field name + * @param queryText text to be passed to the analyzer + * @param operator operator used for clauses between analyzer tokens. + * @return {@code TermQuery} or {@code BooleanQuery}, based on the analysis + * of {@code queryText} + */ + public Query createBooleanQuery(String field, String queryText, BooleanClause.Occur operator) { + if (operator != BooleanClause.Occur.SHOULD && operator != BooleanClause.Occur.MUST) { + throw new IllegalArgumentException("invalid operator: only SHOULD or MUST are allowed"); + } + return createFieldQuery(analyzer, operator, field, queryText, false, 0); + } + + /** + * Creates a phrase query from the query text. + *

+ * This is equivalent to {@code createPhraseQuery(field, queryText, 0)} + * @param field field name + * @param queryText text to be passed to the analyzer + * @return {@code TermQuery}, {@code BooleanQuery}, {@code PhraseQuery}, or + * {@code MultiPhraseQuery}, based on the analysis of {@code queryText} + */ + public Query createPhraseQuery(String field, String queryText) { + return createPhraseQuery(field, queryText, 0); + } + + /** + * Creates a phrase query from the query text. + *

+ * @param field field name + * @param queryText text to be passed to the analyzer + * @param phraseSlop number of other words permitted between words in query phrase + * @return {@code TermQuery}, {@code BooleanQuery}, {@code PhraseQuery}, or + * {@code MultiPhraseQuery}, based on the analysis of {@code queryText} + */ + public Query createPhraseQuery(String field, String queryText, int phraseSlop) { + return createFieldQuery(analyzer, BooleanClause.Occur.MUST, field, queryText, true, phraseSlop); + } + + /** + * Creates a minimum-should-match query from the query text. + *

+ * @param field field name + * @param queryText text to be passed to the analyzer + * @param fraction of query terms {@code [0..1]} that should match + * @return {@code TermQuery} or {@code BooleanQuery}, based on the analysis + * of {@code queryText} + */ + public Query createMinShouldMatchQuery(String field, String queryText, float fraction) { + if (Float.isNaN(fraction) || fraction < 0 || fraction > 1) { + throw new IllegalArgumentException("fraction should be >= 0 and <= 1"); + } + + // TODO: wierd that BQ equals/rewrite/scorer doesn't handle this? + if (fraction == 1) { + return createBooleanQuery(field, queryText, BooleanClause.Occur.MUST); + } + + Query query = createFieldQuery(analyzer, BooleanClause.Occur.SHOULD, field, queryText, false, 0); + if (query instanceof BooleanQuery) { + BooleanQuery bq = (BooleanQuery) query; + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setDisableCoord(bq.isCoordDisabled()); + builder.setMinimumNumberShouldMatch((int) (fraction * bq.clauses().size())); + for (BooleanClause clause : bq) { + builder.add(clause); + } + query = builder.build(); + } + return query; + } + + /** + * Returns the analyzer. + * @see #setAnalyzer(Analyzer) + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * Sets the analyzer used to tokenize text. + */ + public void setAnalyzer(Analyzer analyzer) { + this.analyzer = analyzer; + } + + /** + * Returns true if position increments are enabled. + * @see #setEnablePositionIncrements(boolean) + */ + public boolean getEnablePositionIncrements() { + return enablePositionIncrements; + } + + /** + * Set to true to enable position increments in result query. + *

+ * When set, result phrase and multi-phrase queries will + * be aware of position increments. + * Useful when e.g. a StopFilter increases the position increment of + * the token that follows an omitted token. + *

+ * Default: true. + */ + public void setEnablePositionIncrements(boolean enable) { + this.enablePositionIncrements = enable; + } + + /** + * Creates a query from the analysis chain. + *

+ * Expert: this is more useful for subclasses such as queryparsers. + * If using this class directly, just use {@link #createBooleanQuery(String, String)} + * and {@link #createPhraseQuery(String, String)} + * @param analyzer analyzer used for this query + * @param operator default boolean operator used for this query + * @param field field to create queries against + * @param queryText text to be passed to the analysis chain + * @param quoted true if phrases should be generated when terms occur at more than one position + * @param phraseSlop slop factor for phrase/multiphrase queries + */ + protected Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, boolean quoted, + int phraseSlop) { + assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; + + // Use the analyzer to get all the tokens, and then build an appropriate + // query based on the analysis chain. + + try (TokenStream source = analyzer.tokenStream(field, queryText); + CachingTokenFilter stream = new CachingTokenFilter(source)) { + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + + if (termAtt == null) { + return null; + } + + // phase 1: read through the stream and assess the situation: + // counting the number of tokens/positions and marking if we have any synonyms. + + int numTokens = 0; + int positionCount = 0; + boolean hasSynonyms = false; + + stream.reset(); + while (stream.incrementToken()) { + numTokens++; + int positionIncrement = posIncAtt.getPositionIncrement(); + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + hasSynonyms = true; + } + } + + // phase 2: based on token count, presence of synonyms, and options + // formulate a single term, boolean, or phrase. + + if (numTokens == 0) { + return null; + } else if (numTokens == 1) { + // single term + return analyzeTerm(field, stream); + } else if (quoted && positionCount > 1) { + // phrase + if (hasSynonyms) { + // complex phrase with synonyms + return analyzeMultiPhrase(field, stream, phraseSlop); + } else { + // simple phrase + return analyzePhrase(field, stream, phraseSlop); + } + } else { + // boolean + if (positionCount == 1) { + // only one position, with synonyms + return analyzeBoolean(field, stream); + } else { + // complex case: multiple positions + return analyzeMultiBoolean(field, stream, operator); + } + } + } catch (IOException e) { + throw new RuntimeException("Error analyzing query text", e); + } + } + + /** + * Creates simple term query from the cached tokenstream contents + */ + protected Query analyzeTerm(String field, TokenStream stream) throws IOException { + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + + stream.reset(); + if (!stream.incrementToken()) { + throw new AssertionError(); + } + + return newTermQuery(new Term(field, termAtt.getBytesRef())); + } + + /** + * Creates simple boolean query from the cached tokenstream contents + */ + protected Query analyzeBoolean(String field, TokenStream stream) throws IOException { + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + + stream.reset(); + List terms = new ArrayList<>(); + while (stream.incrementToken()) { + terms.add(new Term(field, termAtt.getBytesRef())); + } + + return newSynonymQuery(terms.toArray(new Term[terms.size()])); + } + + protected void add(BooleanQuery.Builder q, List current, BooleanClause.Occur operator) { + if (current.isEmpty()) { + return; + } + if (current.size() == 1) { + q.add(newTermQuery(current.get(0)), operator); + } else { + q.add(newSynonymQuery(current.toArray(new Term[current.size()])), operator); + } + } + + /** + * Creates complex boolean query from the cached tokenstream contents + */ + protected Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator) throws IOException { + BooleanQuery.Builder q = newBooleanQuery(); + List currentQuery = new ArrayList<>(); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + + stream.reset(); + while (stream.incrementToken()) { + if (posIncrAtt.getPositionIncrement() != 0) { + add(q, currentQuery, operator); + currentQuery.clear(); + } + currentQuery.add(new Term(field, termAtt.getBytesRef())); + } + add(q, currentQuery, operator); + + return q.build(); + } + + /** + * Creates simple phrase query from the cached tokenstream contents + */ + protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException { + PhraseQuery.Builder builder = new PhraseQuery.Builder(); + builder.setSlop(slop); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + int position = -1; + + stream.reset(); + while (stream.incrementToken()) { + if (enablePositionIncrements) { + position += posIncrAtt.getPositionIncrement(); + } else { + position += 1; + } + builder.add(new Term(field, termAtt.getBytesRef()), position); + } + + return builder.build(); + } + + /** + * Creates complex phrase query from the cached tokenstream contents + */ + protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException { + MultiPhraseQuery.Builder mpqb = newMultiPhraseQueryBuilder(); + mpqb.setSlop(slop); + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + + PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class); + int position = -1; + + List multiTerms = new ArrayList<>(); + stream.reset(); + while (stream.incrementToken()) { + int positionIncrement = posIncrAtt.getPositionIncrement(); + + if (positionIncrement > 0 && multiTerms.size() > 0) { + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + multiTerms.clear(); + } + position += positionIncrement; + multiTerms.add(new Term(field, termAtt.getBytesRef())); + } + + if (enablePositionIncrements) { + mpqb.add(multiTerms.toArray(new Term[0]), position); + } else { + mpqb.add(multiTerms.toArray(new Term[0])); + } + return mpqb.build(); + } + + /** + * Builds a new BooleanQuery instance. + *

+ * This is intended for subclasses that wish to customize the generated queries. + * @return new BooleanQuery instance + */ + protected BooleanQuery.Builder newBooleanQuery() { + return new BooleanQuery.Builder(); + } + + /** + * Builds a new SynonymQuery instance. + *

+ * This is intended for subclasses that wish to customize the generated queries. + * @return new Query instance + */ + protected Query newSynonymQuery(Term terms[]) { + return new SynonymQuery(terms); + } + + /** + * Builds a new TermQuery instance. + *

+ * This is intended for subclasses that wish to customize the generated queries. + * @param term term + * @return new TermQuery instance + */ + protected Query newTermQuery(Term term) { + return new TermQuery(term); + } + + /** + * Builds a new MultiPhraseQuery instance. + *

+ * This is intended for subclasses that wish to customize the generated queries. + * @return new MultiPhraseQuery instance + */ + protected MultiPhraseQuery.Builder newMultiPhraseQueryBuilder() { + return new MultiPhraseQuery.Builder(); + } +} diff --git a/core/src/main/java/org/apache/lucene/util/XRollingBuffer.java b/core/src/main/java/org/apache/lucene/util/XRollingBuffer.java new file mode 100644 index 00000000000..5806a84af64 --- /dev/null +++ b/core/src/main/java/org/apache/lucene/util/XRollingBuffer.java @@ -0,0 +1,151 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.util; + + +/** + * Acts like forever growing T[], but internally uses a + * circular buffer to reuse instances of T. + * + * TODO: un-fork once we are on Lucene 6.4.0 + */ +public abstract class XRollingBuffer { + + /** + * Implement to reset an instance + */ + public interface Resettable { + void reset(); + } + + @SuppressWarnings("unchecked") + private T[] buffer = (T[]) new XRollingBuffer.Resettable[8]; + + // Next array index to write to: + private int nextWrite; + + // Next position to write: + private int nextPos; + + // How many valid Position are held in the + // array: + private int count; + + public XRollingBuffer() { + for (int idx = 0; idx < buffer.length; idx++) { + buffer[idx] = newInstance(); + } + } + + protected abstract T newInstance(); + + public void reset() { + nextWrite--; + while (count > 0) { + if (nextWrite == -1) { + nextWrite = buffer.length - 1; + } + buffer[nextWrite--].reset(); + count--; + } + nextWrite = 0; + nextPos = 0; + count = 0; + } + + // For assert: + private boolean inBounds(int pos) { + return pos < nextPos && pos >= nextPos - count; + } + + private int getIndex(int pos) { + int index = nextWrite - (nextPos - pos); + if (index < 0) { + index += buffer.length; + } + return index; + } + + /** + * Get T instance for this absolute position; + * this is allowed to be arbitrarily far "in the + * future" but cannot be before the last freeBefore. + */ + public T get(int pos) { + //System.out.println("RA.get pos=" + pos + " nextPos=" + nextPos + " nextWrite=" + nextWrite + " count=" + count); + while (pos >= nextPos) { + if (count == buffer.length) { + @SuppressWarnings("unchecked") T[] newBuffer = (T[]) new Resettable[ArrayUtil.oversize(1 + count, RamUsageEstimator + .NUM_BYTES_OBJECT_REF)]; + //System.out.println(" grow length=" + newBuffer.length); + System.arraycopy(buffer, nextWrite, newBuffer, 0, buffer.length - nextWrite); + System.arraycopy(buffer, 0, newBuffer, buffer.length - nextWrite, nextWrite); + for (int i = buffer.length; i < newBuffer.length; i++) { + newBuffer[i] = newInstance(); + } + nextWrite = buffer.length; + buffer = newBuffer; + } + if (nextWrite == buffer.length) { + nextWrite = 0; + } + // Should have already been reset: + nextWrite++; + nextPos++; + count++; + } + assert inBounds(pos) : "pos=" + pos + " nextPos=" + nextPos + " count=" + count; + final int index = getIndex(pos); + //System.out.println(" pos=" + pos + " nextPos=" + nextPos + " -> index=" + index); + //assert buffer[index].pos == pos; + return buffer[index]; + } + + /** + * Returns the maximum position looked up, or -1 if no + * position has been looked up since reset/init. + */ + public int getMaxPos() { + return nextPos - 1; + } + + /** + * Returns how many active positions are in the buffer. + */ + public int getBufferSize() { + return count; + } + + public void freeBefore(int pos) { + final int toFree = count - (nextPos - pos); + assert toFree >= 0; + assert toFree <= count : "toFree=" + toFree + " count=" + count; + int index = nextWrite - count; + if (index < 0) { + index += buffer.length; + } + for (int i = 0; i < toFree; i++) { + if (index == buffer.length) { + index = 0; + } + //System.out.println(" fb idx=" + index); + buffer[index].reset(); + index++; + } + count -= toFree; + } +} diff --git a/core/src/main/java/org/elasticsearch/client/transport/TransportClientNodesService.java b/core/src/main/java/org/elasticsearch/client/transport/TransportClientNodesService.java index c4f46294cd4..350a35b6e49 100644 --- a/core/src/main/java/org/elasticsearch/client/transport/TransportClientNodesService.java +++ b/core/src/main/java/org/elasticsearch/client/transport/TransportClientNodesService.java @@ -42,6 +42,7 @@ import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.common.util.concurrent.FutureUtils; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.ConnectionProfile; import org.elasticsearch.transport.FutureTransportResponseHandler; import org.elasticsearch.transport.NodeDisconnectedException; import org.elasticsearch.transport.NodeNotConnectedException; @@ -389,7 +390,7 @@ final class TransportClientNodesService extends AbstractComponent implements Clo try { // its a listed node, light connect to it... logger.trace("connecting to listed node (light) [{}]", listedNode); - transportService.connectToNodeLight(listedNode); + transportService.connectToNode(listedNode, ConnectionProfile.LIGHT_PROFILE); } catch (Exception e) { logger.info( (Supplier) @@ -469,7 +470,7 @@ final class TransportClientNodesService extends AbstractComponent implements Clo } else { // its a listed node, light connect to it... logger.trace("connecting to listed node (light) [{}]", listedNode); - transportService.connectToNodeLight(listedNode); + transportService.connectToNode(listedNode, ConnectionProfile.LIGHT_PROFILE); } } catch (Exception e) { logger.debug( diff --git a/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java b/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java index 25836d54a1b..a1f217e1377 100644 --- a/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java +++ b/core/src/main/java/org/elasticsearch/cluster/metadata/IndexMetaData.java @@ -224,12 +224,15 @@ public class IndexMetaData implements Diffable, FromXContentBuild Setting.boolSetting(SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE, false, Property.Dynamic, Property.IndexScope); public static final String INDEX_UUID_NA_VALUE = "_na_"; + public static final String INDEX_ROUTING_REQUIRE_GROUP_PREFIX = "index.routing.allocation.require"; + public static final String INDEX_ROUTING_INCLUDE_GROUP_PREFIX = "index.routing.allocation.include"; + public static final String INDEX_ROUTING_EXCLUDE_GROUP_PREFIX = "index.routing.allocation.exclude"; public static final Setting INDEX_ROUTING_REQUIRE_GROUP_SETTING = - Setting.groupSetting("index.routing.allocation.require.", Property.Dynamic, Property.IndexScope); + Setting.groupSetting(INDEX_ROUTING_REQUIRE_GROUP_PREFIX + ".", Property.Dynamic, Property.IndexScope); public static final Setting INDEX_ROUTING_INCLUDE_GROUP_SETTING = - Setting.groupSetting("index.routing.allocation.include.", Property.Dynamic, Property.IndexScope); + Setting.groupSetting(INDEX_ROUTING_INCLUDE_GROUP_PREFIX + ".", Property.Dynamic, Property.IndexScope); public static final Setting INDEX_ROUTING_EXCLUDE_GROUP_SETTING = - Setting.groupSetting("index.routing.allocation.exclude.", Property.Dynamic, Property.IndexScope); + Setting.groupSetting(INDEX_ROUTING_EXCLUDE_GROUP_PREFIX + ".", Property.Dynamic, Property.IndexScope); public static final Setting INDEX_ROUTING_INITIAL_RECOVERY_GROUP_SETTING = Setting.groupSetting("index.routing.allocation.initial_recovery."); // this is only setable internally not a registered setting!! diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java index 81b9042fb33..b87add57ce7 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/DiskThresholdSettings.java @@ -49,6 +49,8 @@ public class DiskThresholdSettings { Setting.positiveTimeSetting("cluster.routing.allocation.disk.reroute_interval", TimeValue.timeValueSeconds(60), Setting.Property.Dynamic, Setting.Property.NodeScope); + private volatile String lowWatermarkRaw; + private volatile String highWatermarkRaw; private volatile Double freeDiskThresholdLow; private volatile Double freeDiskThresholdHigh; private volatile ByteSizeValue freeBytesThresholdLow; @@ -86,6 +88,7 @@ public class DiskThresholdSettings { private void setLowWatermark(String lowWatermark) { // Watermark is expressed in terms of used data, but we need "free" data watermark + this.lowWatermarkRaw = lowWatermark; this.freeDiskThresholdLow = 100.0 - thresholdPercentageFromWatermark(lowWatermark); this.freeBytesThresholdLow = thresholdBytesFromWatermark(lowWatermark, CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey()); @@ -93,11 +96,26 @@ public class DiskThresholdSettings { private void setHighWatermark(String highWatermark) { // Watermark is expressed in terms of used data, but we need "free" data watermark + this.highWatermarkRaw = highWatermark; this.freeDiskThresholdHigh = 100.0 - thresholdPercentageFromWatermark(highWatermark); this.freeBytesThresholdHigh = thresholdBytesFromWatermark(highWatermark, CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey()); } + /** + * Gets the raw (uninterpreted) low watermark value as found in the settings. + */ + public String getLowWatermarkRaw() { + return lowWatermarkRaw; + } + + /** + * Gets the raw (uninterpreted) high watermark value as found in the settings. + */ + public String getHighWatermarkRaw() { + return highWatermarkRaw; + } + public Double getFreeDiskThresholdLow() { return freeDiskThresholdLow; } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java index f78dc784d9d..93c45e7832f 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/AwarenessAllocationDecider.java @@ -87,22 +87,6 @@ public class AwarenessAllocationDecider extends AllocationDecider { private volatile Map forcedAwarenessAttributes; - /** - * Creates a new {@link AwarenessAllocationDecider} instance - */ - public AwarenessAllocationDecider() { - this(Settings.Builder.EMPTY_SETTINGS); - } - - /** - * Creates a new {@link AwarenessAllocationDecider} instance from given settings - * - * @param settings {@link Settings} to use - */ - public AwarenessAllocationDecider(Settings settings) { - this(settings, new ClusterSettings(settings, ClusterSettings.BUILT_IN_CLUSTER_SETTINGS)); - } - public AwarenessAllocationDecider(Settings settings, ClusterSettings clusterSettings) { super(settings); this.awarenessAttributes = CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.get(settings); @@ -140,7 +124,9 @@ public class AwarenessAllocationDecider extends AllocationDecider { private Decision underCapacity(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation, boolean moveToNode) { if (awarenessAttributes.length == 0) { - return allocation.decision(Decision.YES, NAME, "allocation awareness is not enabled"); + return allocation.decision(Decision.YES, NAME, + "allocation awareness is not enabled, set [%s] to enable it", + CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey()); } IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); @@ -148,7 +134,10 @@ public class AwarenessAllocationDecider extends AllocationDecider { for (String awarenessAttribute : awarenessAttributes) { // the node the shard exists on must be associated with an awareness attribute if (!node.node().getAttributes().containsKey(awarenessAttribute)) { - return allocation.decision(Decision.NO, NAME, "node does not contain the awareness attribute: [%s]", awarenessAttribute); + return allocation.decision(Decision.NO, NAME, + "node does not contain the awareness attribute [%s]; required attributes [%s=%s]", + awarenessAttribute, CLUSTER_ROUTING_ALLOCATION_AWARENESS_ATTRIBUTE_SETTING.getKey(), + allocation.debugDecision() ? Strings.arrayToCommaDelimitedString(awarenessAttributes) : null); } // build attr_value -> nodes map @@ -206,15 +195,14 @@ public class AwarenessAllocationDecider extends AllocationDecider { // if we are above with leftover, then we know we are not good, even with mod if (currentNodeCount > (requiredCountPerAttribute + leftoverPerAttribute)) { return allocation.decision(Decision.NO, NAME, - "there are too many shards on the node for attribute [%s], there are [%d] total shards for the index " + - " and [%d] total attributes values, expected the node count [%d] to be lower or equal to the required " + - "number of shards per attribute [%d] plus leftover [%d]", + "there are too many copies of the shard allocated to nodes with attribute [%s], there are [%d] total configured " + + "shard copies for this shard id and [%d] total attribute values, expected the allocated shard count per " + + "attribute [%d] to be less than or equal to the upper bound of the required number of shards per attribute [%d]", awarenessAttribute, shardCount, numberOfAttributes, currentNodeCount, - requiredCountPerAttribute, - leftoverPerAttribute); + requiredCountPerAttribute + leftoverPerAttribute); } // all is well, we are below or same as average if (currentNodeCount <= requiredCountPerAttribute) { diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java index c343d4254c8..4e4fb58799b 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ClusterRebalanceAllocationDecider.java @@ -48,14 +48,15 @@ import org.elasticsearch.common.settings.Settings; public class ClusterRebalanceAllocationDecider extends AllocationDecider { public static final String NAME = "cluster_rebalance"; + private static final String CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE = "cluster.routing.allocation.allow_rebalance"; public static final Setting CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING = - new Setting<>("cluster.routing.allocation.allow_rebalance", ClusterRebalanceType.INDICES_ALL_ACTIVE.name().toLowerCase(Locale.ROOT), + new Setting<>(CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, ClusterRebalanceType.INDICES_ALL_ACTIVE.toString(), ClusterRebalanceType::parseString, Property.Dynamic, Property.NodeScope); /** * An enum representation for the configured re-balance type. */ - public static enum ClusterRebalanceType { + public enum ClusterRebalanceType { /** * Re-balancing is allowed once a shard replication group is active */ @@ -80,6 +81,11 @@ public class ClusterRebalanceAllocationDecider extends AllocationDecider { throw new IllegalArgumentException("Illegal value for " + CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING + ": " + typeString); } + + @Override + public String toString() { + return name().toLowerCase(Locale.ROOT); + } } private volatile ClusterRebalanceType type; @@ -94,8 +100,7 @@ public class ClusterRebalanceAllocationDecider extends AllocationDecider { CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getRaw(settings)); type = ClusterRebalanceType.INDICES_ALL_ACTIVE; } - logger.debug("using [{}] with [{}]", CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING.getKey(), - type.toString().toLowerCase(Locale.ROOT)); + logger.debug("using [{}] with [{}]", CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, type); clusterSettings.addSettingsUpdateConsumer(CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE_SETTING, this::setType); } @@ -115,12 +120,14 @@ public class ClusterRebalanceAllocationDecider extends AllocationDecider { // check if there are unassigned primaries. if ( allocation.routingNodes().hasUnassignedPrimaries() ) { return allocation.decision(Decision.NO, NAME, - "the cluster has unassigned primary shards and rebalance type is set to [%s]", type); + "the cluster has unassigned primary shards and [%s] is set to [%s]", + CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, type); } // check if there are initializing primaries that don't have a relocatingNodeId entry. if ( allocation.routingNodes().hasInactivePrimaries() ) { return allocation.decision(Decision.NO, NAME, - "the cluster has inactive primary shards and rebalance type is set to [%s]", type); + "the cluster has inactive primary shards and [%s] is set to [%s]", + CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, type); } return allocation.decision(Decision.YES, NAME, "all primary shards are active"); @@ -129,16 +136,18 @@ public class ClusterRebalanceAllocationDecider extends AllocationDecider { // check if there are unassigned shards. if (allocation.routingNodes().hasUnassignedShards() ) { return allocation.decision(Decision.NO, NAME, - "the cluster has unassigned shards and rebalance type is set to [%s]", type); + "the cluster has unassigned shards and [%s] is set to [%s]", + CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, type); } // in case all indices are assigned, are there initializing shards which // are not relocating? if ( allocation.routingNodes().hasInactiveShards() ) { return allocation.decision(Decision.NO, NAME, - "the cluster has inactive shards and rebalance type is set to [%s]", type); + "the cluster has inactive shards and [%s] is set to [%s]", + CLUSTER_ROUTING_ALLOCATION_ALLOW_REBALANCE, type); } } // type == Type.ALWAYS - return allocation.decision(Decision.YES, NAME, "all shards are active, rebalance type is [%s]", type); + return allocation.decision(Decision.YES, NAME, "all shards are active"); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java index dd3ece10dd5..6ec123ddab3 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ConcurrentRebalanceAllocationDecider.java @@ -66,9 +66,11 @@ public class ConcurrentRebalanceAllocationDecider extends AllocationDecider { } int relocatingShards = allocation.routingNodes().getRelocatingShardCount(); if (relocatingShards >= clusterConcurrentRebalance) { - return allocation.decision(Decision.NO, NAME, - "too many shards are concurrently rebalancing [%d], limit: [%d]", - relocatingShards, clusterConcurrentRebalance); + return allocation.decision(Decision.THROTTLE, NAME, + "reached the limit of concurrently rebalancing shards [%d], [%s=%d]", + relocatingShards, + CLUSTER_ROUTING_ALLOCATION_CLUSTER_CONCURRENT_REBALANCE_SETTING.getKey(), + clusterConcurrentRebalance); } return allocation.decision(Decision.YES, NAME, "below threshold [%d] for concurrent rebalances, current rebalance shard count [%d]", diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java index 53d3dd29034..5eb1ae1751e 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDecider.java @@ -40,6 +40,9 @@ import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.index.Index; import org.elasticsearch.index.shard.ShardId; +import static org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING; +import static org.elasticsearch.cluster.routing.allocation.DiskThresholdSettings.CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING; + /** * The {@link DiskThresholdDecider} checks that the node a shard is potentially * being allocated to has enough disk space. @@ -135,8 +138,10 @@ public class DiskThresholdDecider extends AllocationDecider { diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId()); } return allocation.decision(Decision.NO, NAME, - "the node is above the low watermark and has less than required [%s] free, free: [%s]", - diskThresholdSettings.getFreeBytesThresholdLow(), new ByteSizeValue(freeBytes)); + "the node is above the low watermark [%s=%s], having less than the minimum required [%s] free space, actual free: [%s]", + CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getLowWatermarkRaw(), + diskThresholdSettings.getFreeBytesThresholdLow(), new ByteSizeValue(freeBytes)); } else if (freeBytes > diskThresholdSettings.getFreeBytesThresholdHigh().getBytes()) { // Allow the shard to be allocated because it is primary that // has never been allocated if it's under the high watermark @@ -146,7 +151,8 @@ public class DiskThresholdDecider extends AllocationDecider { diskThresholdSettings.getFreeBytesThresholdLow(), freeBytes, node.nodeId()); } return allocation.decision(Decision.YES, NAME, - "the node is above the low watermark, but this primary shard has never been allocated before"); + "the node is above the low watermark, but less than the high watermark, and this primary shard has " + + "never been allocated before"); } else { // Even though the primary has never been allocated, the node is // above the high watermark, so don't allow allocating the shard @@ -156,9 +162,11 @@ public class DiskThresholdDecider extends AllocationDecider { diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId()); } return allocation.decision(Decision.NO, NAME, - "the node is above the high watermark even though this shard has never been allocated " + - "and has less than required [%s] free on node, free: [%s]", - diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes)); + "the node is above the high watermark [%s=%s], having less than the minimum required [%s] free space, " + + "actual free: [%s]", + CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getHighWatermarkRaw(), + diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes)); } } @@ -172,8 +180,10 @@ public class DiskThresholdDecider extends AllocationDecider { Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId()); } return allocation.decision(Decision.NO, NAME, - "the node is above the low watermark and has more than allowed [%s%%] used disk, free: [%s%%]", - usedDiskThresholdLow, freeDiskPercentage); + "the node is above the low watermark [%s=%s], using more disk space than the maximum allowed [%s%%], " + + "actual free: [%s%%]", + CLUSTER_ROUTING_ALLOCATION_LOW_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getLowWatermarkRaw(), usedDiskThresholdLow, freeDiskPercentage); } else if (freeDiskPercentage > diskThresholdSettings.getFreeDiskThresholdHigh()) { // Allow the shard to be allocated because it is primary that // has never been allocated if it's under the high watermark @@ -184,7 +194,8 @@ public class DiskThresholdDecider extends AllocationDecider { Strings.format1Decimals(usedDiskPercentage, "%"), node.nodeId()); } return allocation.decision(Decision.YES, NAME, - "the node is above the low watermark, but this primary shard has never been allocated before"); + "the node is above the low watermark, but less than the high watermark, and this primary shard has " + + "never been allocated before"); } else { // Even though the primary has never been allocated, the node is // above the high watermark, so don't allow allocating the shard @@ -195,9 +206,10 @@ public class DiskThresholdDecider extends AllocationDecider { Strings.format1Decimals(freeDiskPercentage, "%"), node.nodeId()); } return allocation.decision(Decision.NO, NAME, - "the node is above the high watermark even though this shard has never been allocated " + - "and has more than allowed [%s%%] used disk, free: [%s%%]", - usedDiskThresholdHigh, freeDiskPercentage); + "the node is above the high watermark [%s=%s], using more disk space than the maximum allowed [%s%%], " + + "actual free: [%s%%]", + CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeDiskPercentage); } } @@ -210,9 +222,11 @@ public class DiskThresholdDecider extends AllocationDecider { "{} free bytes threshold ({} bytes free), preventing allocation", node.nodeId(), diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytesAfterShard); return allocation.decision(Decision.NO, NAME, - "after allocating the shard to this node, it would be above the high watermark " + - "and have less than required [%s] free, free: [%s]", - diskThresholdSettings.getFreeBytesThresholdLow(), new ByteSizeValue(freeBytesAfterShard)); + "allocating the shard to this node will bring the node above the high watermark [%s=%s] " + + "and cause it to have less than the minimum required [%s] of free space (free bytes after shard added: [%s])", + CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getHighWatermarkRaw(), + diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytesAfterShard)); } if (freeSpaceAfterShard < diskThresholdSettings.getFreeDiskThresholdHigh()) { logger.warn("after allocating, node [{}] would have more than the allowed " + @@ -220,9 +234,10 @@ public class DiskThresholdDecider extends AllocationDecider { node.nodeId(), Strings.format1Decimals(diskThresholdSettings.getFreeDiskThresholdHigh(), "%"), Strings.format1Decimals(freeSpaceAfterShard, "%")); return allocation.decision(Decision.NO, NAME, - "after allocating the shard to this node, it would be above the high watermark " + - "and have more than allowed [%s%%] used disk, free: [%s%%]", - usedDiskThresholdLow, freeSpaceAfterShard); + "allocating the shard to this node will bring the node above the high watermark [%s=%s] " + + "and cause it to use more disk space than the maximum allowed [%s%%] (free space after shard added: [%s%%])", + CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getHighWatermarkRaw(), usedDiskThresholdHigh, freeSpaceAfterShard); } return allocation.decision(Decision.YES, NAME, @@ -264,9 +279,11 @@ public class DiskThresholdDecider extends AllocationDecider { diskThresholdSettings.getFreeBytesThresholdHigh(), freeBytes, node.nodeId()); } return allocation.decision(Decision.NO, NAME, - "after allocating this shard this node would be above the high watermark " + - "and there would be less than required [%s] free on node, free: [%s]", - diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes)); + "the shard cannot remain on this node because it is above the high watermark [%s=%s] " + + "and there is less than the required [%s] free space on node, actual free: [%s]", + CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getHighWatermarkRaw(), + diskThresholdSettings.getFreeBytesThresholdHigh(), new ByteSizeValue(freeBytes)); } if (freeDiskPercentage < diskThresholdSettings.getFreeDiskThresholdHigh()) { if (logger.isDebugEnabled()) { @@ -274,9 +291,11 @@ public class DiskThresholdDecider extends AllocationDecider { diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage, node.nodeId()); } return allocation.decision(Decision.NO, NAME, - "after allocating this shard this node would be above the high watermark " + - "and there would be less than required [%s%%] free disk on node, free: [%s%%]", - diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage); + "the shard cannot remain on this node because it is above the high watermark [%s=%s] " + + "and there is less than the required [%s%%] free disk on node, actual free: [%s%%]", + CLUSTER_ROUTING_ALLOCATION_HIGH_DISK_WATERMARK_SETTING.getKey(), + diskThresholdSettings.getHighWatermarkRaw(), + diskThresholdSettings.getFreeDiskThresholdHigh(), freeDiskPercentage); } return allocation.decision(Decision.YES, NAME, diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java index 1a38e3742fc..2bb5012da30 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/EnableAllocationDecider.java @@ -98,7 +98,8 @@ public class EnableAllocationDecider extends AllocationDecider { @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { if (allocation.ignoreDisable()) { - return allocation.decision(Decision.YES, NAME, "allocation is explicitly ignoring any disabling of allocation"); + return allocation.decision(Decision.YES, NAME, + "explicitly ignoring any disabling of allocation due to manual allocation commands via the reroute API"); } final IndexMetaData indexMetaData = allocation.metaData().getIndexSafe(shardRouting.index()); diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java index a42db129da9..21b6b3d1354 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/FilterAllocationDecider.java @@ -64,12 +64,15 @@ public class FilterAllocationDecider extends AllocationDecider { public static final String NAME = "filter"; + private static final String CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX = "cluster.routing.allocation.require"; + private static final String CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX = "cluster.routing.allocation.include"; + private static final String CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX = "cluster.routing.allocation.exclude"; public static final Setting CLUSTER_ROUTING_REQUIRE_GROUP_SETTING = - Setting.groupSetting("cluster.routing.allocation.require.", Property.Dynamic, Property.NodeScope); + Setting.groupSetting(CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX + ".", Property.Dynamic, Property.NodeScope); public static final Setting CLUSTER_ROUTING_INCLUDE_GROUP_SETTING = - Setting.groupSetting("cluster.routing.allocation.include.", Property.Dynamic, Property.NodeScope); + Setting.groupSetting(CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX + ".", Property.Dynamic, Property.NodeScope); public static final Setting CLUSTER_ROUTING_EXCLUDE_GROUP_SETTING = - Setting.groupSetting("cluster.routing.allocation.exclude.", Property.Dynamic, Property.NodeScope); + Setting.groupSetting(CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX + ".", Property.Dynamic, Property.NodeScope); private volatile DiscoveryNodeFilters clusterRequireFilters; private volatile DiscoveryNodeFilters clusterIncludeFilters; @@ -96,8 +99,10 @@ public class FilterAllocationDecider extends AllocationDecider { if (initialRecoveryFilters != null && RecoverySource.isInitialRecovery(shardRouting.recoverySource().getType()) && initialRecoveryFilters.match(node.node()) == false) { - return allocation.decision(Decision.NO, NAME, "node does not match index initial recovery filters [%s]", - indexMd.includeFilters()); + String explanation = (shardRouting.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) ? + "initial allocation of the shrunken index is only allowed on nodes [%s] that hold a copy of every shard in the index" : + "initial allocation of the index is only allowed on nodes [%s]"; + return allocation.decision(Decision.NO, NAME, explanation, initialRecoveryFilters); } } return shouldFilter(shardRouting, node, allocation); @@ -136,17 +141,20 @@ public class FilterAllocationDecider extends AllocationDecider { private Decision shouldIndexFilter(IndexMetaData indexMd, RoutingNode node, RoutingAllocation allocation) { if (indexMd.requireFilters() != null) { if (!indexMd.requireFilters().match(node.node())) { - return allocation.decision(Decision.NO, NAME, "node does not match index required filters [%s]", indexMd.requireFilters()); + return allocation.decision(Decision.NO, NAME, "node does not match [%s] filters [%s]", + IndexMetaData.INDEX_ROUTING_REQUIRE_GROUP_PREFIX, indexMd.requireFilters()); } } if (indexMd.includeFilters() != null) { if (!indexMd.includeFilters().match(node.node())) { - return allocation.decision(Decision.NO, NAME, "node does not match index include filters [%s]", indexMd.includeFilters()); + return allocation.decision(Decision.NO, NAME, "node does not match [%s] filters [%s]", + IndexMetaData.INDEX_ROUTING_INCLUDE_GROUP_PREFIX, indexMd.includeFilters()); } } if (indexMd.excludeFilters() != null) { if (indexMd.excludeFilters().match(node.node())) { - return allocation.decision(Decision.NO, NAME, "node matches index exclude filters [%s]", indexMd.excludeFilters()); + return allocation.decision(Decision.NO, NAME, "node matches [%s] filters [%s]", + IndexMetaData.INDEX_ROUTING_EXCLUDE_GROUP_SETTING.getKey(), indexMd.excludeFilters()); } } return null; @@ -155,17 +163,20 @@ public class FilterAllocationDecider extends AllocationDecider { private Decision shouldClusterFilter(RoutingNode node, RoutingAllocation allocation) { if (clusterRequireFilters != null) { if (!clusterRequireFilters.match(node.node())) { - return allocation.decision(Decision.NO, NAME, "node does not match global required filters [%s]", clusterRequireFilters); + return allocation.decision(Decision.NO, NAME, "node does not match [%s] filters [%s]", + CLUSTER_ROUTING_REQUIRE_GROUP_PREFIX, clusterRequireFilters); } } if (clusterIncludeFilters != null) { if (!clusterIncludeFilters.match(node.node())) { - return allocation.decision(Decision.NO, NAME, "node does not match global include filters [%s]", clusterIncludeFilters); + return allocation.decision(Decision.NO, NAME, "node does not [%s] filters [%s]", + CLUSTER_ROUTING_INCLUDE_GROUP_PREFIX, clusterIncludeFilters); } } if (clusterExcludeFilters != null) { if (clusterExcludeFilters.match(node.node())) { - return allocation.decision(Decision.NO, NAME, "node matches global exclude filters [%s]", clusterExcludeFilters); + return allocation.decision(Decision.NO, NAME, "node matches [%s] filters [%s]", + CLUSTER_ROUTING_EXCLUDE_GROUP_PREFIX, clusterExcludeFilters); } } return null; diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RebalanceOnlyWhenActiveAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RebalanceOnlyWhenActiveAllocationDecider.java index d8042f18a27..c4cd2ecf50d 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RebalanceOnlyWhenActiveAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/RebalanceOnlyWhenActiveAllocationDecider.java @@ -37,8 +37,8 @@ public class RebalanceOnlyWhenActiveAllocationDecider extends AllocationDecider @Override public Decision canRebalance(ShardRouting shardRouting, RoutingAllocation allocation) { if (!allocation.routingNodes().allReplicasActive(shardRouting.shardId(), allocation.metaData())) { - return allocation.decision(Decision.NO, NAME, "rebalancing can not occur if not all replicas are active in the cluster"); + return allocation.decision(Decision.NO, NAME, "rebalancing is not allowed until all replicas in the cluster are active"); } - return allocation.decision(Decision.YES, NAME, "all replicas are active in the cluster, rebalancing can occur"); + return allocation.decision(Decision.YES, NAME, "rebalancing is allowed as all replicas are active in the cluster"); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java index 3f2921dfcdc..afd742dd041 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SameShardAllocationDecider.java @@ -61,8 +61,15 @@ public class SameShardAllocationDecider extends AllocationDecider { Iterable assignedShards = allocation.routingNodes().assignedShards(shardRouting.shardId()); for (ShardRouting assignedShard : assignedShards) { if (node.nodeId().equals(assignedShard.currentNodeId())) { - return allocation.decision(Decision.NO, NAME, - "the shard cannot be allocated on the same node id [%s] on which it already exists", node.nodeId()); + if (assignedShard.isSameAllocation(shardRouting)) { + return allocation.decision(Decision.NO, NAME, + "the shard cannot be allocated to the node on which it already exists [%s]", + shardRouting.toString()); + } else { + return allocation.decision(Decision.NO, NAME, + "the shard cannot be allocated to the same node on which a copy of the shard [%s] already exists", + assignedShard.toString()); + } } } if (sameHost) { @@ -72,27 +79,32 @@ public class SameShardAllocationDecider extends AllocationDecider { continue; } // check if its on the same host as the one we want to allocate to - boolean checkNodeOnSameHost = false; + boolean checkNodeOnSameHostName = false; + boolean checkNodeOnSameHostAddress = false; if (Strings.hasLength(checkNode.node().getHostAddress()) && Strings.hasLength(node.node().getHostAddress())) { if (checkNode.node().getHostAddress().equals(node.node().getHostAddress())) { - checkNodeOnSameHost = true; + checkNodeOnSameHostAddress = true; } } else if (Strings.hasLength(checkNode.node().getHostName()) && Strings.hasLength(node.node().getHostName())) { if (checkNode.node().getHostName().equals(node.node().getHostName())) { - checkNodeOnSameHost = true; + checkNodeOnSameHostName = true; } } - if (checkNodeOnSameHost) { + if (checkNodeOnSameHostAddress || checkNodeOnSameHostName) { for (ShardRouting assignedShard : assignedShards) { if (checkNode.nodeId().equals(assignedShard.currentNodeId())) { + String hostType = checkNodeOnSameHostAddress ? "address" : "name"; + String host = checkNodeOnSameHostAddress ? node.node().getHostAddress() : node.node().getHostName(); return allocation.decision(Decision.NO, NAME, - "shard cannot be allocated on the same host [%s] on which it already exists", node.nodeId()); + "the shard cannot be allocated on host %s [%s], where it already exists on node [%s]; " + + "set [%s] to false to allow multiple nodes on the same host to hold the same shard copies", + hostType, host, node.nodeId(), CLUSTER_ROUTING_ALLOCATION_SAME_HOST_SETTING.getKey()); } } } } } } - return allocation.decision(Decision.YES, NAME, "shard is not allocated to same node or host"); + return allocation.decision(Decision.YES, NAME, "the shard does not exist on the same " + (sameHost ? "host" : "node")); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java index aa4fe3d593d..15f2cf20c1a 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ShardsLimitAllocationDecider.java @@ -107,17 +107,18 @@ public class ShardsLimitAllocationDecider extends AllocationDecider { } } if (clusterShardLimit > 0 && nodeShardCount >= clusterShardLimit) { - return allocation.decision(Decision.NO, NAME, "too many shards for this node [%d], cluster-level limit per node: [%d]", - nodeShardCount, clusterShardLimit); + return allocation.decision(Decision.NO, NAME, + "too many shards [%d] allocated to this node, [%s=%d]", + nodeShardCount, CLUSTER_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), clusterShardLimit); } if (indexShardLimit > 0 && indexShardCount >= indexShardLimit) { return allocation.decision(Decision.NO, NAME, - "too many shards for this index [%s] on node [%d], index-level limit per node: [%d]", - shardRouting.index(), indexShardCount, indexShardLimit); + "too many shards [%d] allocated to this node for index [%s], [%s=%d]", + indexShardCount, shardRouting.getIndexName(), INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), indexShardLimit); } return allocation.decision(Decision.YES, NAME, - "the shard count is under index limit [%d] and cluster level node limit [%d] of total shards per node", - indexShardLimit, clusterShardLimit); + "the shard count [%d] for this node is under the index limit [%d] and cluster level node limit [%d]", + nodeShardCount, indexShardLimit, clusterShardLimit); } @Override @@ -148,17 +149,18 @@ public class ShardsLimitAllocationDecider extends AllocationDecider { // Subtle difference between the `canAllocate` and `canRemain` is that // this checks > while canAllocate checks >= if (clusterShardLimit > 0 && nodeShardCount > clusterShardLimit) { - return allocation.decision(Decision.NO, NAME, "too many shards for this node [%d], cluster-level limit per node: [%d]", - nodeShardCount, clusterShardLimit); + return allocation.decision(Decision.NO, NAME, + "too many shards [%d] allocated to this node, [%s=%d]", + nodeShardCount, CLUSTER_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), clusterShardLimit); } if (indexShardLimit > 0 && indexShardCount > indexShardLimit) { return allocation.decision(Decision.NO, NAME, - "too many shards for this index [%s] on node [%d], index-level limit per node: [%d]", - shardRouting.index(), indexShardCount, indexShardLimit); + "too many shards [%d] allocated to this node for index [%s], [%s=%d]", + indexShardCount, shardRouting.getIndexName(), INDEX_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), indexShardLimit); } return allocation.decision(Decision.YES, NAME, - "the shard count is under index limit [%d] and cluster level node limit [%d] of total shards per node", - indexShardLimit, clusterShardLimit); + "the shard count [%d] for this node is under the index limit [%d] and cluster level node limit [%d]", + nodeShardCount, indexShardLimit, clusterShardLimit); } @Override @@ -182,10 +184,12 @@ public class ShardsLimitAllocationDecider extends AllocationDecider { nodeShardCount++; } if (clusterShardLimit >= 0 && nodeShardCount >= clusterShardLimit) { - return allocation.decision(Decision.NO, NAME, "too many shards for this node [%d], cluster-level limit per node: [%d]", - nodeShardCount, clusterShardLimit); + return allocation.decision(Decision.NO, NAME, + "too many shards [%d] allocated to this node, [%s=%d]", + nodeShardCount, CLUSTER_TOTAL_SHARDS_PER_NODE_SETTING.getKey(), clusterShardLimit); } - return allocation.decision(Decision.YES, NAME, "the shard count is under node limit [%d] of total shards per node", - clusterShardLimit); + return allocation.decision(Decision.YES, NAME, + "the shard count [%d] for this node is under the cluster level node limit [%d]", + nodeShardCount, clusterShardLimit); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java index 3c20f1ec062..18ee6395bd4 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/SnapshotInProgressAllocationDecider.java @@ -77,15 +77,16 @@ public class SnapshotInProgressAllocationDecider extends AllocationDecider { if (shardSnapshotStatus != null && !shardSnapshotStatus.state().completed() && shardSnapshotStatus.nodeId() != null && shardSnapshotStatus.nodeId().equals(shardRouting.currentNodeId())) { if (logger.isTraceEnabled()) { - logger.trace("Preventing snapshotted shard [{}] to be moved from node [{}]", + logger.trace("Preventing snapshotted shard [{}] from being moved away from node [{}]", shardRouting.shardId(), shardSnapshotStatus.nodeId()); } - return allocation.decision(Decision.NO, NAME, "snapshot for shard [%s] is currently running on node [%s]", - shardRouting.shardId(), shardSnapshotStatus.nodeId()); + return allocation.decision(Decision.THROTTLE, NAME, + "waiting for snapshotting of shard [%s] to complete on this node [%s]", + shardRouting.shardId(), shardSnapshotStatus.nodeId()); } } } - return allocation.decision(Decision.YES, NAME, "the shard is not primary or relocation is disabled"); + return allocation.decision(Decision.YES, NAME, "the shard is not being snapshotted"); } } diff --git a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java index df2e1d12234..a59f543ac3f 100644 --- a/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java +++ b/core/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/ThrottlingAllocationDecider.java @@ -126,8 +126,9 @@ public class ThrottlingAllocationDecider extends AllocationDecider { } if (primariesInRecovery >= primariesInitialRecoveries) { // TODO: Should index creation not be throttled for primary shards? - return allocation.decision(THROTTLE, NAME, "too many primaries are currently recovering [%d], limit: [%d]", - primariesInRecovery, primariesInitialRecoveries); + return allocation.decision(THROTTLE, NAME, "reached the limit of ongoing initial primary recoveries [%d], [%s=%d]", + primariesInRecovery, CLUSTER_ROUTING_ALLOCATION_NODE_INITIAL_PRIMARIES_RECOVERIES_SETTING.getKey(), + primariesInitialRecoveries); } else { return allocation.decision(YES, NAME, "below primary recovery limit of [%d]", primariesInitialRecoveries); } @@ -138,8 +139,11 @@ public class ThrottlingAllocationDecider extends AllocationDecider { // Allocating a shard to this node will increase the incoming recoveries int currentInRecoveries = allocation.routingNodes().getIncomingRecoveries(node.nodeId()); if (currentInRecoveries >= concurrentIncomingRecoveries) { - return allocation.decision(THROTTLE, NAME, "too many incoming shards are currently recovering [%d], limit: [%d]", - currentInRecoveries, concurrentIncomingRecoveries); + return allocation.decision(THROTTLE, NAME, + "reached the limit of incoming shard recoveries [%d], [%s=%d] (can also be set via [%s])", + currentInRecoveries, CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_INCOMING_RECOVERIES_SETTING.getKey(), + concurrentIncomingRecoveries, + CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey()); } else { // search for corresponding recovery source (= primary shard) and check number of outgoing recoveries on that node ShardRouting primaryShard = allocation.routingNodes().activePrimary(shardRouting.shardId()); @@ -148,8 +152,13 @@ public class ThrottlingAllocationDecider extends AllocationDecider { } int primaryNodeOutRecoveries = allocation.routingNodes().getOutgoingRecoveries(primaryShard.currentNodeId()); if (primaryNodeOutRecoveries >= concurrentOutgoingRecoveries) { - return allocation.decision(THROTTLE, NAME, "too many outgoing shards are currently recovering [%d], limit: [%d]", - primaryNodeOutRecoveries, concurrentOutgoingRecoveries); + return allocation.decision(THROTTLE, NAME, + "reached the limit of outgoing shard recoveries [%d] on the node [%s] which holds the primary, " + + "[%s=%d] (can also be set via [%s])", + primaryNodeOutRecoveries, node.nodeId(), + CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_OUTGOING_RECOVERIES_SETTING.getKey(), + concurrentOutgoingRecoveries, + CLUSTER_ROUTING_ALLOCATION_NODE_CONCURRENT_RECOVERIES_SETTING.getKey()); } else { return allocation.decision(YES, NAME, "below shard recovery limit of outgoing: [%d < %d] incoming: [%d < %d]", primaryNodeOutRecoveries, diff --git a/core/src/main/java/org/elasticsearch/discovery/zen/UnicastZenPing.java b/core/src/main/java/org/elasticsearch/discovery/zen/UnicastZenPing.java index eec9548dd08..f24e7da5cf8 100644 --- a/core/src/main/java/org/elasticsearch/discovery/zen/UnicastZenPing.java +++ b/core/src/main/java/org/elasticsearch/discovery/zen/UnicastZenPing.java @@ -468,7 +468,7 @@ public class UnicastZenPing extends AbstractComponent implements ZenPing { // connect to the node, see if we manage to do it, if not, bail if (!nodeFoundByAddress) { logger.trace("[{}] connecting (light) to {}", sendPingsHandler.id(), finalNodeToSend); - transportService.connectToNodeLightAndHandshake(finalNodeToSend, timeout.getMillis()); + transportService.connectToNodeAndHandshake(finalNodeToSend, timeout.getMillis()); } else { logger.trace("[{}] connecting to {}", sendPingsHandler.id(), finalNodeToSend); transportService.connectToNode(finalNodeToSend); diff --git a/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java b/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java index 272a75f4e7a..a15c5902705 100644 --- a/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java +++ b/core/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java @@ -43,12 +43,10 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Priority; import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.component.Lifecycle; -import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.internal.Nullable; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lease.Releasables; -import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; @@ -851,10 +849,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implements Discover } void handleJoinRequest(final DiscoveryNode node, final ClusterState state, final MembershipAction.JoinCallback callback) { - if (!transportService.addressSupported(node.getAddress().getClass())) { - // TODO, what should we do now? Maybe inform that node that its crap? - logger.warn("received a wrong address type from [{}], ignoring...", node); - } else if (nodeJoinController == null) { + if (nodeJoinController == null) { throw new IllegalStateException("discovery module is not yet started"); } else { // try and connect to the node, if it fails, we can raise an exception back to the client... diff --git a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java index 6dddf6eb57f..87f9692f625 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/AnalysisRegistry.java @@ -158,11 +158,12 @@ public final class AnalysisRegistry implements Closeable { final Map tokenFiltersSettings = indexSettings.getSettings().getGroups(INDEX_ANALYSIS_FILTER); Map> tokenFilters = new HashMap<>(this.tokenFilters); /* - * synonym is different than everything else since it needs access to the tokenizer factories for this index. + * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and * hide internal data-structures as much as possible. */ tokenFilters.put("synonym", requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings))); + tokenFilters.put("synonym_graph", requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings))); return buildMapping(false, "tokenfilter", indexSettings, tokenFiltersSettings, Collections.unmodifiableMap(tokenFilters), prebuiltAnalysis.tokenFilterFactories); } @@ -213,12 +214,14 @@ public final class AnalysisRegistry implements Closeable { Settings currentSettings = tokenFilterSettings.get(tokenFilter); String typeName = currentSettings.get("type"); /* - * synonym is different than everything else since it needs access to the tokenizer factories for this index. + * synonym and synonym_graph are different than everything else since they need access to the tokenizer factories for the index. * instead of building the infrastructure for plugins we rather make it a real exception to not pollute the general interface and * hide internal data-structures as much as possible. */ if ("synonym".equals(typeName)) { return requriesAnalysisSettings((is, env, name, settings) -> new SynonymTokenFilterFactory(is, env, this, name, settings)); + } else if ("synonym_graph".equals(typeName)) { + return requriesAnalysisSettings((is, env, name, settings) -> new SynonymGraphFilterFactory(is, env, this, name, settings)); } else { return getAnalysisProvider("tokenfilter", tokenFilters, tokenFilter, typeName); } diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphFilterFactory.java new file mode 100644 index 00000000000..da9b11b9785 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/index/analysis/SynonymGraphFilterFactory.java @@ -0,0 +1,41 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.analysis; + +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.synonym.SynonymGraphFilter; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.env.Environment; +import org.elasticsearch.index.IndexSettings; + +import java.io.IOException; + +public class SynonymGraphFilterFactory extends SynonymTokenFilterFactory { + public SynonymGraphFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, + String name, Settings settings) throws IOException { + super(indexSettings, env, analysisRegistry, name, settings); + } + + @Override + public TokenStream create(TokenStream tokenStream) { + // fst is null means no synonyms + return synonymMap.fst == null ? tokenStream : new SynonymGraphFilter(tokenStream, synonymMap, ignoreCase); + } +} diff --git a/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java b/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java index 11f1303328c..d32c66e0dfe 100644 --- a/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java +++ b/core/src/main/java/org/elasticsearch/index/analysis/SynonymTokenFilterFactory.java @@ -40,8 +40,8 @@ import java.util.List; public class SynonymTokenFilterFactory extends AbstractTokenFilterFactory { - private final SynonymMap synonymMap; - private final boolean ignoreCase; + protected final SynonymMap synonymMap; + protected final boolean ignoreCase; public SynonymTokenFilterFactory(IndexSettings indexSettings, Environment env, AnalysisRegistry analysisRegistry, String name, Settings settings) throws IOException { diff --git a/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index 25397d2a3ee..b08fca4baa2 100644 --- a/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/core/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -22,6 +22,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.GraphQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.ParseField; import org.elasticsearch.common.ParsingException; @@ -36,6 +37,7 @@ import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.index.search.MatchQuery.ZeroTermsQuery; import java.io.IOException; +import java.util.List; import java.util.Locale; import java.util.Objects; import java.util.Optional; @@ -471,9 +473,25 @@ public class MatchQueryBuilder extends AbstractQueryBuilder { // and multiple variations of the same word in the query (synonyms for instance). if (query instanceof BooleanQuery && !((BooleanQuery) query).isCoordDisabled()) { query = Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch); + } else if (query instanceof GraphQuery && ((GraphQuery) query).hasBoolean()) { + // we have a graph query that has at least one boolean sub-query + // re-build and set minimum should match value on all boolean queries + List oldQueries = ((GraphQuery) query).getQueries(); + Query[] queries = new Query[oldQueries.size()]; + for (int i = 0; i < queries.length; i++) { + Query oldQuery = oldQueries.get(i); + if (oldQuery instanceof BooleanQuery) { + queries[i] = Queries.applyMinimumShouldMatch((BooleanQuery) oldQuery, minimumShouldMatch); + } else { + queries[i] = oldQuery; + } + } + + query = new GraphQuery(queries); } else if (query instanceof ExtendedCommonTermsQuery) { ((ExtendedCommonTermsQuery)query).setLowFreqMinimumNumberShouldMatch(minimumShouldMatch); } + return query; } diff --git a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 46eb6b7d399..ad73dc43dca 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -19,7 +19,16 @@ package org.elasticsearch.index.search; +import static org.apache.lucene.analysis.synonym.SynonymGraphFilter.GRAPH_FLAG; + import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.CachingTokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.synonym.GraphTokenStreamFiniteStrings; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.queries.ExtendedCommonTermsQuery; import org.apache.lucene.search.BooleanClause; @@ -27,12 +36,14 @@ import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.FuzzyQuery; +import org.apache.lucene.search.GraphQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.MultiTermQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.util.QueryBuilder; +import org.apache.lucene.util.XQueryBuilder; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.stream.StreamInput; @@ -47,6 +58,8 @@ import org.elasticsearch.index.query.QueryShardContext; import org.elasticsearch.index.query.support.QueryParsers; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; public class MatchQuery { @@ -112,13 +125,19 @@ public class MatchQuery { } } - /** the default phrase slop */ + /** + * the default phrase slop + */ public static final int DEFAULT_PHRASE_SLOP = 0; - /** the default leniency setting */ + /** + * the default leniency setting + */ public static final boolean DEFAULT_LENIENCY = false; - /** the default zero terms query */ + /** + * the default zero terms query + */ public static final ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = ZeroTermsQuery.NONE; protected final QueryShardContext context; @@ -285,7 +304,7 @@ public class MatchQuery { return Queries.newMatchAllQuery(); } - private class MatchQueryBuilder extends QueryBuilder { + private class MatchQueryBuilder extends XQueryBuilder { private final MappedFieldType mapper; @@ -297,11 +316,126 @@ public class MatchQuery { this.mapper = mapper; } + /** + * Creates a query from the analysis chain. Overrides original so all it does is create the token stream and pass that into the + * new {@link #createFieldQuery(TokenStream, Occur, String, boolean, int)} method which has all the original query generation logic. + * + * @param analyzer analyzer used for this query + * @param operator default boolean operator used for this query + * @param field field to create queries against + * @param queryText text to be passed to the analysis chain + * @param quoted true if phrases should be generated when terms occur at more than one position + * @param phraseSlop slop factor for phrase/multiphrase queries + */ + @Override + protected final Query createFieldQuery(Analyzer analyzer, BooleanClause.Occur operator, String field, String queryText, + boolean quoted, int phraseSlop) { + assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; + + // Use the analyzer to get all the tokens, and then build an appropriate + // query based on the analysis chain. + try (TokenStream source = analyzer.tokenStream(field, queryText)) { + return createFieldQuery(source, operator, field, quoted, phraseSlop); + } catch (IOException e) { + throw new RuntimeException("Error analyzing query text", e); + } + } + + /** + * Creates a query from a token stream. Same logic as {@link #createFieldQuery(Analyzer, Occur, String, String, boolean, int)} + * with additional graph token stream detection. + * + * @param source the token stream to create the query from + * @param operator default boolean operator used for this query + * @param field field to create queries against + * @param quoted true if phrases should be generated when terms occur at more than one position + * @param phraseSlop slop factor for phrase/multiphrase queries + */ + protected final Query createFieldQuery(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, + int phraseSlop) { + assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST; + + // Build an appropriate query based on the analysis chain. + try (CachingTokenFilter stream = new CachingTokenFilter(source)) { + + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); + PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class); + FlagsAttribute flagsAtt = stream.addAttribute(FlagsAttribute.class); + + if (termAtt == null) { + return null; + } + + // phase 1: read through the stream and assess the situation: + // counting the number of tokens/positions and marking if we have any synonyms. + + int numTokens = 0; + int positionCount = 0; + boolean hasSynonyms = false; + boolean isGraph = false; + + stream.reset(); + while (stream.incrementToken()) { + numTokens++; + int positionIncrement = posIncAtt.getPositionIncrement(); + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + hasSynonyms = true; + } + + int positionLength = posLenAtt.getPositionLength(); + if (!isGraph && positionLength > 1 && ((flagsAtt.getFlags() & GRAPH_FLAG) == GRAPH_FLAG)) { + isGraph = true; + } + } + + // phase 2: based on token count, presence of synonyms, and options + // formulate a single term, boolean, or phrase. + + if (numTokens == 0) { + return null; + } else if (numTokens == 1) { + // single term + return analyzeTerm(field, stream); + } else if (isGraph) { + // graph + return analyzeGraph(stream, operator, field, quoted, phraseSlop); + } else if (quoted && positionCount > 1) { + // phrase + if (hasSynonyms) { + // complex phrase with synonyms + return analyzeMultiPhrase(field, stream, phraseSlop); + } else { + // simple phrase + return analyzePhrase(field, stream, phraseSlop); + } + } else { + // boolean + if (positionCount == 1) { + // only one position, with synonyms + return analyzeBoolean(field, stream); + } else { + // complex case: multiple positions + return analyzeMultiBoolean(field, stream, operator); + } + } + } catch (IOException e) { + throw new RuntimeException("Error analyzing query text", e); + } + } + @Override protected Query newTermQuery(Term term) { return blendTermQuery(term, mapper); } + @Override + protected Query newSynonymQuery(Term[] terms) { + return blendTermsQuery(terms, mapper); + } + public Query createPhrasePrefixQuery(String field, String queryText, int phraseSlop, int maxExpansions) { final Query query = createFieldQuery(getAnalyzer(), Occur.MUST, field, queryText, true, phraseSlop); float boost = 1; @@ -319,7 +453,7 @@ public class MatchQuery { Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; i++) { - prefixQuery.add(new Term[] {terms[i]}, positions[i]); + prefixQuery.add(new Term[]{terms[i]}, positions[i]); } return boost == 1 ? prefixQuery : new BoostQuery(prefixQuery, boost); } else if (innerQuery instanceof MultiPhraseQuery) { @@ -340,11 +474,13 @@ public class MatchQuery { return query; } - public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency, MappedFieldType fieldType) { + public Query createCommonTermsQuery(String field, String queryText, Occur highFreqOccur, Occur lowFreqOccur, float + maxTermFrequency, MappedFieldType fieldType) { Query booleanQuery = createBooleanQuery(field, queryText, lowFreqOccur); if (booleanQuery != null && booleanQuery instanceof BooleanQuery) { BooleanQuery bq = (BooleanQuery) booleanQuery; - ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ((BooleanQuery)booleanQuery).isCoordDisabled(), fieldType); + ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, ( + (BooleanQuery) booleanQuery).isCoordDisabled(), fieldType); for (BooleanClause clause : bq.clauses()) { if (!(clause.getQuery() instanceof TermQuery)) { return booleanQuery; @@ -356,6 +492,34 @@ public class MatchQuery { return booleanQuery; } + + /** + * Creates a query from a graph token stream by extracting all the finite strings from the graph and using them to create the query. + */ + protected Query analyzeGraph(TokenStream source, BooleanClause.Occur operator, String field, boolean quoted, int phraseSlop) + throws IOException { + source.reset(); + GraphTokenStreamFiniteStrings graphTokenStreams = new GraphTokenStreamFiniteStrings(); + List tokenStreams = graphTokenStreams.getTokenStreams(source); + + if (tokenStreams.isEmpty()) { + return null; + } + + List queries = new ArrayList<>(tokenStreams.size()); + for (TokenStream ts : tokenStreams) { + Query query = createFieldQuery(ts, operator, field, quoted, phraseSlop); + if (query != null) { + queries.add(query); + } + } + + return new GraphQuery(queries.toArray(new Query[0])); + } + } + + protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { + return new SynonymQuery(terms); } protected Query blendTermQuery(Term term, MappedFieldType fieldType) { diff --git a/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java b/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java index 9ac7e2e7520..d08d4aaddc1 100644 --- a/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java +++ b/core/src/main/java/org/elasticsearch/index/search/MultiMatchQuery.java @@ -158,6 +158,10 @@ public class MultiMatchQuery extends MatchQuery { return MultiMatchQuery.super.blendTermQuery(term, fieldType); } + public Query blendTerms(Term[] terms, MappedFieldType fieldType) { + return MultiMatchQuery.super.blendTermsQuery(terms, fieldType); + } + public Query termQuery(MappedFieldType fieldType, Object value) { return MultiMatchQuery.this.termQuery(fieldType, value, lenient); } @@ -223,6 +227,18 @@ public class MultiMatchQuery extends MatchQuery { return queries.isEmpty() ? null : queries; } + @Override + public Query blendTerms(Term[] terms, MappedFieldType fieldType) { + if (blendedFields == null || blendedFields.length == 1) { + return super.blendTerms(terms, fieldType); + } + BytesRef[] values = new BytesRef[terms.length]; + for (int i = 0; i < terms.length; i++) { + values[i] = terms[i].bytes(); + } + return MultiMatchQuery.blendTerms(context, values, commonTermsCutoff, tieBreaker, blendedFields); + } + @Override public Query blendTerm(Term term, MappedFieldType fieldType) { if (blendedFields == null) { @@ -243,44 +259,51 @@ public class MultiMatchQuery extends MatchQuery { } static Query blendTerm(QueryShardContext context, BytesRef value, Float commonTermsCutoff, float tieBreaker, + FieldAndFieldType... blendedFields) { + return blendTerms(context, new BytesRef[] {value}, commonTermsCutoff, tieBreaker, blendedFields); + } + + static Query blendTerms(QueryShardContext context, BytesRef[] values, Float commonTermsCutoff, float tieBreaker, FieldAndFieldType... blendedFields) { List queries = new ArrayList<>(); - Term[] terms = new Term[blendedFields.length]; - float[] blendedBoost = new float[blendedFields.length]; + Term[] terms = new Term[blendedFields.length * values.length]; + float[] blendedBoost = new float[blendedFields.length * values.length]; int i = 0; for (FieldAndFieldType ft : blendedFields) { - Query query; - try { - query = ft.fieldType.termQuery(value, context); - } catch (IllegalArgumentException e) { - // the query expects a certain class of values such as numbers - // of ip addresses and the value can't be parsed, so ignore this - // field - continue; - } catch (ElasticsearchParseException parseException) { - // date fields throw an ElasticsearchParseException with the - // underlying IAE as the cause, ignore this field if that is - // the case - if (parseException.getCause() instanceof IllegalArgumentException) { + for (BytesRef term : values) { + Query query; + try { + query = ft.fieldType.termQuery(term, context); + } catch (IllegalArgumentException e) { + // the query expects a certain class of values such as numbers + // of ip addresses and the value can't be parsed, so ignore this + // field continue; + } catch (ElasticsearchParseException parseException) { + // date fields throw an ElasticsearchParseException with the + // underlying IAE as the cause, ignore this field if that is + // the case + if (parseException.getCause() instanceof IllegalArgumentException) { + continue; + } + throw parseException; } - throw parseException; - } - float boost = ft.boost; - while (query instanceof BoostQuery) { - BoostQuery bq = (BoostQuery) query; - query = bq.getQuery(); - boost *= bq.getBoost(); - } - if (query.getClass() == TermQuery.class) { - terms[i] = ((TermQuery) query).getTerm(); - blendedBoost[i] = boost; - i++; - } else { - if (boost != 1f) { - query = new BoostQuery(query, boost); + float boost = ft.boost; + while (query instanceof BoostQuery) { + BoostQuery bq = (BoostQuery) query; + query = bq.getQuery(); + boost *= bq.getBoost(); + } + if (query.getClass() == TermQuery.class) { + terms[i] = ((TermQuery) query).getTerm(); + blendedBoost[i] = boost; + i++; + } else { + if (boost != 1f) { + query = new BoostQuery(query, boost); + } + queries.add(query); } - queries.add(query); } } if (i > 0) { @@ -317,6 +340,14 @@ public class MultiMatchQuery extends MatchQuery { return queryBuilder.blendTerm(term, fieldType); } + @Override + protected Query blendTermsQuery(Term[] terms, MappedFieldType fieldType) { + if (queryBuilder == null) { + return super.blendTermsQuery(terms, fieldType); + } + return queryBuilder.blendTerms(terms, fieldType); + } + static final class FieldAndFieldType { final MappedFieldType fieldType; final float boost; diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java index 5dd0203d617..89c9421198d 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/AnalysisModule.java @@ -152,13 +152,12 @@ import java.util.List; */ public final class AnalysisModule { static { - Settings build = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) - .put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 1) - .put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1) - .build(); + Settings build = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetaData + .SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).build(); IndexMetaData metaData = IndexMetaData.builder("_na_").settings(build).build(); NA_INDEX_SETTINGS = new IndexSettings(metaData, Settings.EMPTY); } + private static final IndexSettings NA_INDEX_SETTINGS; private final HunspellService hunspellService; @@ -171,8 +170,8 @@ public final class AnalysisModule { NamedRegistry> tokenFilters = setupTokenFilters(plugins, hunspellService); NamedRegistry> tokenizers = setupTokenizers(plugins); NamedRegistry>> analyzers = setupAnalyzers(plugins); - analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), - tokenizers.getRegistry(), analyzers.getRegistry()); + analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers + .getRegistry(), analyzers.getRegistry()); } HunspellService getHunspellService() { @@ -198,8 +197,8 @@ public final class AnalysisModule { return hunspellDictionaries; } - private NamedRegistry> setupTokenFilters(List plugins, - HunspellService hunspellService) { + private NamedRegistry> setupTokenFilters(List plugins, HunspellService + hunspellService) { NamedRegistry> tokenFilters = new NamedRegistry<>("token_filter"); tokenFilters.register("stop", StopTokenFilterFactory::new); tokenFilters.register("reverse", ReverseTokenFilterFactory::new); @@ -251,8 +250,8 @@ public final class AnalysisModule { tokenFilters.register("scandinavian_folding", ScandinavianFoldingFilterFactory::new); tokenFilters.register("serbian_normalization", SerbianNormalizationFilterFactory::new); - tokenFilters.register("hunspell", requriesAnalysisSettings( - (indexSettings, env, name, settings) -> new HunspellTokenFilterFactory(indexSettings, name, settings, hunspellService))); + tokenFilters.register("hunspell", requriesAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory + (indexSettings, name, settings, hunspellService))); tokenFilters.register("cjk_bigram", CJKBigramFilterFactory::new); tokenFilters.register("cjk_width", CJKWidthFilterFactory::new); @@ -341,6 +340,7 @@ public final class AnalysisModule { public T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException { return provider.get(indexSettings, environment, name, settings); } + @Override public boolean requiresAnalysisSettings() { return true; @@ -355,10 +355,11 @@ public final class AnalysisModule { /** * Creates a new analysis provider. + * * @param indexSettings the index settings for the index this provider is created for - * @param environment the nodes environment to load resources from persistent storage - * @param name the name of the analysis component - * @param settings the component specific settings without context prefixes + * @param environment the nodes environment to load resources from persistent storage + * @param name the name of the analysis component + * @param settings the component specific settings without context prefixes * @return a new provider instance * @throws IOException if an {@link IOException} occurs */ @@ -369,11 +370,11 @@ public final class AnalysisModule { * This can be used to get a default instance of an analysis factory without binding to an index. * * @param environment the nodes environment to load resources from persistent storage - * @param name the name of the analysis component + * @param name the name of the analysis component * @return a new provider instance - * @throws IOException if an {@link IOException} occurs + * @throws IOException if an {@link IOException} occurs * @throws IllegalArgumentException if the provider requires analysis settings ie. if {@link #requiresAnalysisSettings()} returns - * true + * true */ default T get(Environment environment, String name) throws IOException { if (requiresAnalysisSettings()) { diff --git a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java index e1189e3197d..d48bb9df272 100644 --- a/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java +++ b/core/src/main/java/org/elasticsearch/indices/analysis/PreBuiltTokenFilters.java @@ -386,7 +386,7 @@ public enum PreBuiltTokenFilters { public TokenStream create(TokenStream tokenStream, Version version) { return new LimitTokenCountFilter(tokenStream, LimitTokenCountFilterFactory.DEFAULT_MAX_TOKEN_COUNT, LimitTokenCountFilterFactory.DEFAULT_CONSUME_ALL_TOKENS); } - } + }, ; diff --git a/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java b/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java index 3db9d4340ef..c17b9258f58 100644 --- a/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java +++ b/core/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java @@ -1137,7 +1137,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp */ protected Tuple buildBlobStoreIndexShardSnapshots(Map blobs) { int latest = -1; - for (String name : blobs.keySet()) { + Set blobKeys = blobs.keySet(); + for (String name : blobKeys) { if (name.startsWith(SNAPSHOT_INDEX_PREFIX)) { try { int gen = Integer.parseInt(name.substring(SNAPSHOT_INDEX_PREFIX.length())); @@ -1158,15 +1159,17 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp final String file = SNAPSHOT_INDEX_PREFIX + latest; logger.warn((Supplier) () -> new ParameterizedMessage("failed to read index file [{}]", file), e); } + } else if (blobKeys.isEmpty() == false) { + logger.debug("Could not find a readable index-N file in a non-empty shard snapshot directory [{}]", blobContainer.path()); } // We couldn't load the index file - falling back to loading individual snapshots List snapshots = new ArrayList<>(); - for (String name : blobs.keySet()) { + for (String name : blobKeys) { try { BlobStoreIndexShardSnapshot snapshot = null; if (name.startsWith(SNAPSHOT_PREFIX)) { - snapshot = indexShardSnapshotFormat.readBlob(blobContainer, snapshotId.getUUID()); + snapshot = indexShardSnapshotFormat.readBlob(blobContainer, name); } else if (name.startsWith(LEGACY_SNAPSHOT_PREFIX)) { snapshot = indexShardSnapshotLegacyFormat.readBlob(blobContainer, name); } diff --git a/core/src/main/java/org/elasticsearch/script/ScriptService.java b/core/src/main/java/org/elasticsearch/script/ScriptService.java index 1dc1cda0ada..478aac6a55c 100644 --- a/core/src/main/java/org/elasticsearch/script/ScriptService.java +++ b/core/src/main/java/org/elasticsearch/script/ScriptService.java @@ -22,6 +22,7 @@ package org.elasticsearch.script; import org.apache.logging.log4j.message.ParameterizedMessage; import org.apache.logging.log4j.util.Supplier; import org.apache.lucene.util.IOUtils; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ResourceNotFoundException; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.admin.cluster.storedscripts.DeleteStoredScriptRequest; @@ -52,6 +53,9 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; +import org.elasticsearch.common.xcontent.ToXContent; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.json.JsonXContent; import org.elasticsearch.env.Environment; import org.elasticsearch.search.lookup.SearchLookup; import org.elasticsearch.watcher.FileChangesListener; @@ -599,6 +603,22 @@ public class ScriptService extends AbstractComponent implements Closeable, Clust } else { logger.warn("skipping compile of script file [{}] as all scripted operations are disabled for file scripts", file.toAbsolutePath()); } + } catch (ScriptException e) { + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + builder.prettyPrint(); + builder.startObject(); + ElasticsearchException.toXContent(builder, ToXContent.EMPTY_PARAMS, e); + builder.endObject(); + logger.warn("failed to load/compile script [{}]: {}", scriptNameExt.v1(), builder.string()); + } catch (IOException ioe) { + ioe.addSuppressed(e); + logger.warn((Supplier) () -> new ParameterizedMessage( + "failed to log an appropriate warning after failing to load/compile script [{}]", scriptNameExt.v1()), ioe); + } + /* Log at the whole exception at the debug level as well just in case the stack trace is important. That way you can + * turn on the stack trace if you need it. */ + logger.debug((Supplier) () -> new ParameterizedMessage("failed to load/compile script [{}]. full exception:", + scriptNameExt.v1()), e); } catch (Exception e) { logger.warn((Supplier) () -> new ParameterizedMessage("failed to load/compile script [{}]", scriptNameExt.v1()), e); } diff --git a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java index 5ff1df9c664..8a16d4c6eb0 100644 --- a/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java +++ b/core/src/main/java/org/elasticsearch/search/fetch/subphase/highlight/CustomQueryScorer.java @@ -24,7 +24,6 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.WeightedSpanTerm; import org.apache.lucene.search.highlight.WeightedSpanTermExtractor; -import org.apache.lucene.spatial.geopoint.search.GeoPointInBBoxQuery; import org.elasticsearch.common.lucene.search.function.FiltersFunctionScoreQuery; import org.elasticsearch.common.lucene.search.function.FunctionScoreQuery; import org.elasticsearch.index.query.HasChildQueryBuilder; @@ -78,10 +77,7 @@ public final class CustomQueryScorer extends QueryScorer { @Override protected void extractUnknownQuery(Query query, Map terms) throws IOException { - if (query instanceof FiltersFunctionScoreQuery) { - query = ((FiltersFunctionScoreQuery) query).getSubQuery(); - extract(query, 1F, terms); - } else if (terms.isEmpty()) { + if (terms.isEmpty()) { extractWeightedTerms(terms, query, 1F); } } @@ -92,6 +88,8 @@ public final class CustomQueryScorer extends QueryScorer { return; } else if (query instanceof FunctionScoreQuery) { super.extract(((FunctionScoreQuery) query).getSubQuery(), boost, terms); + } else if (query instanceof FiltersFunctionScoreQuery) { + super.extract(((FiltersFunctionScoreQuery) query).getSubQuery(), boost, terms); } else { super.extract(query, boost, terms); } diff --git a/core/src/main/java/org/elasticsearch/transport/ConnectionProfile.java b/core/src/main/java/org/elasticsearch/transport/ConnectionProfile.java new file mode 100644 index 00000000000..97f4c3349c0 --- /dev/null +++ b/core/src/main/java/org/elasticsearch/transport/ConnectionProfile.java @@ -0,0 +1,142 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.transport; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.EnumSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * A connection profile describes how many connection are established to specific node for each of the available request types. + * ({@link org.elasticsearch.transport.TransportRequestOptions.Type}). This allows to tailor a connection towards a specific usage. + */ +public final class ConnectionProfile { + + /** + * A pre-built light connection profile that shares a single connection across all + * types. + */ + public static final ConnectionProfile LIGHT_PROFILE = new ConnectionProfile( + Collections.singletonList(new ConnectionTypeHandle(0, 1, + TransportRequestOptions.Type.BULK, + TransportRequestOptions.Type.PING, + TransportRequestOptions.Type.RECOVERY, + TransportRequestOptions.Type.REG, + TransportRequestOptions.Type.STATE)), 1); + + private final List handles; + private final int numConnections; + + private ConnectionProfile(List handles, int numConnections) { + this.handles = handles; + this.numConnections = numConnections; + } + + /** + * A builder to build a new {@link ConnectionProfile} + */ + public static class Builder { + private final List handles = new ArrayList<>(); + private final Set addedTypes = EnumSet.noneOf(TransportRequestOptions.Type.class); + private int offset = 0; + + /** + * Adds a number of connections for one or more types. Each type can only be added once. + * @param numConnections the number of connections to use in the pool for the given connection types + * @param types a set of types that should share the given number of connections + */ + public void addConnections(int numConnections, TransportRequestOptions.Type... types) { + if (types == null || types.length == 0) { + throw new IllegalArgumentException("types must not be null"); + } + for (TransportRequestOptions.Type type : types) { + if (addedTypes.contains(type)) { + throw new IllegalArgumentException("type [" + type + "] is already registered"); + } + } + addedTypes.addAll(Arrays.asList(types)); + handles.add(new ConnectionTypeHandle(offset, numConnections, types)); + offset += numConnections; + } + + /** + * Creates a new {@link ConnectionProfile} based on the added connections. + * @throws IllegalStateException if any of the {@link org.elasticsearch.transport.TransportRequestOptions.Type} enum is missing + */ + public ConnectionProfile build() { + EnumSet types = EnumSet.allOf(TransportRequestOptions.Type.class); + types.removeAll(addedTypes); + if (types.isEmpty() == false) { + throw new IllegalStateException("not all types are added for this connection profile - missing types: " + types); + } + return new ConnectionProfile(Collections.unmodifiableList(handles), offset); + } + } + + /** + * Returns the total number of connections for this profile + */ + public int getNumConnections() { + return numConnections; + } + + /** + * Returns the type handles for this connection profile + */ + List getHandles() { + return Collections.unmodifiableList(handles); + } + + /** + * Connection type handle encapsulates the logic which connection + */ + static final class ConnectionTypeHandle { + public final int length; + public final int offset; + private final TransportRequestOptions.Type[] types; + private final AtomicInteger counter = new AtomicInteger(); + + private ConnectionTypeHandle(int offset, int length, TransportRequestOptions.Type... types) { + this.length = length; + this.offset = offset; + this.types = types; + } + + /** + * Returns one of the channels out configured for this handle. The channel is selected in a round-robin + * fashion. + */ + T getChannel(T[] channels) { + assert channels.length >= offset + length : "illegal size: " + channels.length + " expected >= " + (offset + length); + return channels[offset + Math.floorMod(counter.incrementAndGet(), length)]; + } + + /** + * Returns all types for this handle + */ + TransportRequestOptions.Type[] getTypes() { + return types; + } + } + +} diff --git a/core/src/main/java/org/elasticsearch/transport/TcpTransport.java b/core/src/main/java/org/elasticsearch/transport/TcpTransport.java index a68863d0e52..11e8de3c3cc 100644 --- a/core/src/main/java/org/elasticsearch/transport/TcpTransport.java +++ b/core/src/main/java/org/elasticsearch/transport/TcpTransport.java @@ -67,7 +67,6 @@ import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.monitor.jvm.JvmInfo; import org.elasticsearch.rest.RestStatus; import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.support.TransportStatus; import java.io.Closeable; import java.io.IOException; @@ -81,6 +80,7 @@ import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; +import java.util.EnumMap; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; @@ -91,12 +91,13 @@ import java.util.Set; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReadWriteLock; import java.util.concurrent.locks.ReentrantReadWriteLock; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import static java.util.Collections.unmodifiableMap; import static org.elasticsearch.common.settings.Setting.boolSetting; @@ -178,6 +179,7 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i protected final boolean compress; protected volatile BoundTransportAddress boundAddress; private final String transportName; + private final ConnectionProfile defaultConnectionProfile; public TcpTransport(String transportName, Settings settings, ThreadPool threadPool, BigArrays bigArrays, CircuitBreakerService circuitBreakerService, NamedWriteableRegistry namedWriteableRegistry, @@ -200,6 +202,13 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i this.connectionsPerNodePing = CONNECTIONS_PER_NODE_PING.get(settings); this.connectTimeout = TCP_CONNECT_TIMEOUT.get(settings); this.blockingClient = TCP_BLOCKING_CLIENT.get(settings); + ConnectionProfile.Builder builder = new ConnectionProfile.Builder(); + builder.addConnections(connectionsPerNodeBulk, TransportRequestOptions.Type.BULK); + builder.addConnections(connectionsPerNodePing, TransportRequestOptions.Type.PING); + builder.addConnections(connectionsPerNodeRecovery, TransportRequestOptions.Type.RECOVERY); + builder.addConnections(connectionsPerNodeReg, TransportRequestOptions.Type.REG); + builder.addConnections(connectionsPerNodeState, TransportRequestOptions.Type.STATE); + defaultConnectionProfile = builder.build(); } @Override @@ -255,7 +264,7 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i for (Map.Entry entry : connectedNodes.entrySet()) { DiscoveryNode node = entry.getKey(); NodeChannels channels = entry.getValue(); - for (Channel channel : channels.allChannels) { + for (Channel channel : channels.getChannels()) { try { sendMessage(channel, pingHeader, successfulPings::inc); } catch (Exception e) { @@ -304,40 +313,31 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i } } - public class NodeChannels implements Closeable { + public final class NodeChannels implements Closeable { + private final Map typeMapping + = new EnumMap<>(TransportRequestOptions.Type.class); + private final Channel[] channels; + private final AtomicBoolean establishedAllConnections = new AtomicBoolean(false); - public List allChannels = Collections.emptyList(); - public Channel[] recovery; - public final AtomicInteger recoveryCounter = new AtomicInteger(); - public Channel[] bulk; - public final AtomicInteger bulkCounter = new AtomicInteger(); - public Channel[] reg; - public final AtomicInteger regCounter = new AtomicInteger(); - public Channel[] state; - public final AtomicInteger stateCounter = new AtomicInteger(); - public Channel[] ping; - public final AtomicInteger pingCounter = new AtomicInteger(); - - public NodeChannels(Channel[] recovery, Channel[] bulk, Channel[] reg, Channel[] state, Channel[] ping) { - this.recovery = recovery; - this.bulk = bulk; - this.reg = reg; - this.state = state; - this.ping = ping; + public NodeChannels(Channel[] channels, ConnectionProfile connectionProfile) { + this.channels = channels; + assert channels.length == connectionProfile.getNumConnections() : "expected channels size to be == " + + connectionProfile.getNumConnections() + " but was: [" + channels.length + "]"; + for (ConnectionProfile.ConnectionTypeHandle handle : connectionProfile.getHandles()) { + for (TransportRequestOptions.Type type : handle.getTypes()) + typeMapping.put(type, handle); + } } - public void start() { - List newAllChannels = new ArrayList<>(); - newAllChannels.addAll(Arrays.asList(recovery)); - newAllChannels.addAll(Arrays.asList(bulk)); - newAllChannels.addAll(Arrays.asList(reg)); - newAllChannels.addAll(Arrays.asList(state)); - newAllChannels.addAll(Arrays.asList(ping)); - this.allChannels = Collections.unmodifiableList(newAllChannels); + public void connectionsEstablished() { + if (establishedAllConnections.compareAndSet(false, true) == false) { + throw new AssertionError("connected more than once"); + } + } public boolean hasChannel(Channel channel) { - for (Channel channel1 : allChannels) { + for (Channel channel1 : channels) { if (channel.equals(channel1)) { return true; } @@ -345,29 +345,26 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i return false; } - public Channel channel(TransportRequestOptions.Type type) { - if (type == TransportRequestOptions.Type.REG) { - return reg[Math.floorMod(regCounter.incrementAndGet(), reg.length)]; - } else if (type == TransportRequestOptions.Type.STATE) { - return state[Math.floorMod(stateCounter.incrementAndGet(), state.length)]; - } else if (type == TransportRequestOptions.Type.PING) { - return ping[Math.floorMod(pingCounter.incrementAndGet(), ping.length)]; - } else if (type == TransportRequestOptions.Type.BULK) { - return bulk[Math.floorMod(bulkCounter.incrementAndGet(), bulk.length)]; - } else if (type == TransportRequestOptions.Type.RECOVERY) { - return recovery[Math.floorMod(recoveryCounter.incrementAndGet(), recovery.length)]; + public List getChannels() { + if (establishedAllConnections.get()) { // don't expose the channels until we are connected + return Arrays.asList(channels); } else { - throw new IllegalArgumentException("no type channel for [" + type + "]"); + return Collections.emptyList(); } } - public List getChannelArrays() { - return Arrays.asList(recovery, bulk, reg, state, ping); + public Channel channel(TransportRequestOptions.Type type) { + assert establishedAllConnections.get(); + ConnectionProfile.ConnectionTypeHandle connectionTypeHandle = typeMapping.get(type); + if (connectionTypeHandle == null) { + throw new IllegalArgumentException("no type channel for [" + type + "]"); + } + return connectionTypeHandle.getChannel(channels); } @Override public synchronized void close() throws IOException { - closeChannels(allChannels); + closeChannels(Arrays.asList(channels).stream().filter(Objects::nonNull).collect(Collectors.toList())); } } @@ -377,16 +374,8 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i } @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { - connectToNode(node, true); - } - - @Override - public void connectToNode(DiscoveryNode node) { - connectToNode(node, false); - } - - public void connectToNode(DiscoveryNode node, boolean light) { + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) { + connectionProfile = connectionProfile == null ? defaultConnectionProfile : connectionProfile; if (!lifecycle.started()) { throw new IllegalStateException("can't add nodes to a stopped transport"); } @@ -405,20 +394,16 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i return; } try { - if (light) { - nodeChannels = connectToChannelsLight(node); - } else { - try { - nodeChannels = connectToChannels(node); - } catch (Exception e) { - logger.trace( - (Supplier) () -> new ParameterizedMessage( - "failed to connect to [{}], cleaning dangling connections", node), e); - throw e; - } + try { + nodeChannels = connectToChannels(node, connectionProfile); + } catch (Exception e) { + logger.trace( + (Supplier) () -> new ParameterizedMessage( + "failed to connect to [{}], cleaning dangling connections", node), e); + throw e; } // we acquire a connection lock, so no way there is an existing connection - nodeChannels.start(); + nodeChannels.connectionsEstablished(); connectedNodes.put(node, nodeChannels); if (logger.isDebugEnabled()) { logger.debug("connected to node [{}]", node); @@ -513,11 +498,6 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i return Version.CURRENT; } - @Override - public boolean addressSupported(Class address) { - return TransportAddress.class.equals(address); - } - @Override public BoundTransportAddress boundAddress() { return this.boundAddress; @@ -889,21 +869,10 @@ public abstract class TcpTransport extends AbstractLifecycleComponent i */ protected abstract void closeChannels(List channel) throws IOException; - /** - * Connects to the given node in a light way. This means we are not creating multiple connections like we do - * for production connections. This connection is for pings or handshakes - */ - protected abstract NodeChannels connectToChannelsLight(DiscoveryNode node) throws IOException; - protected abstract void sendMessage(Channel channel, BytesReference reference, Runnable sendListener) throws IOException; - /** - * Connects to the node in a heavy way. - * - * @see #connectToChannelsLight(DiscoveryNode) - */ - protected abstract NodeChannels connectToChannels(DiscoveryNode node) throws IOException; + protected abstract NodeChannels connectToChannels(DiscoveryNode node, ConnectionProfile connectionProfile) throws IOException; /** * Called to tear down internal resources diff --git a/core/src/main/java/org/elasticsearch/transport/Transport.java b/core/src/main/java/org/elasticsearch/transport/Transport.java index c3c178a2c84..96dcd61483d 100644 --- a/core/src/main/java/org/elasticsearch/transport/Transport.java +++ b/core/src/main/java/org/elasticsearch/transport/Transport.java @@ -56,26 +56,16 @@ public interface Transport extends LifecycleComponent { */ TransportAddress[] addressesFromString(String address, int perAddressLimit) throws UnknownHostException; - /** - * Is the address type supported. - */ - boolean addressSupported(Class address); - /** * Returns true if the node is connected. */ boolean nodeConnected(DiscoveryNode node); /** - * Connects to the given node, if already connected, does nothing. + * Connects to a node with the given connection profile. Use {@link ConnectionProfile#LIGHT_PROFILE} when just connecting for ping + * and then disconnecting. If the node is already connected this method has no effect */ - void connectToNode(DiscoveryNode node) throws ConnectTransportException; - - /** - * Connects to a node in a light manner. Used when just connecting for ping and then - * disconnecting. - */ - void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException; + void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException; /** * Disconnected from the given node, if not connected, will do nothing. @@ -99,5 +89,4 @@ public interface Transport extends LifecycleComponent { default CircuitBreaker getInFlightRequestBreaker() { return new NoopCircuitBreaker("in-flight-noop"); } - } diff --git a/core/src/main/java/org/elasticsearch/transport/TransportService.java b/core/src/main/java/org/elasticsearch/transport/TransportService.java index 60cdaf7e978..511be6fb84c 100644 --- a/core/src/main/java/org/elasticsearch/transport/TransportService.java +++ b/core/src/main/java/org/elasticsearch/transport/TransportService.java @@ -255,10 +255,6 @@ public class TransportService extends AbstractLifecycleComponent { blockIncomingRequestsLatch.countDown(); } - public final boolean addressSupported(Class address) { - return transport.addressSupported(address); - } - public TransportInfo info() { BoundTransportAddress boundTransportAddress = boundAddress(); if (boundTransportAddress == null) { @@ -285,22 +281,20 @@ public class TransportService extends AbstractLifecycleComponent { } public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - if (node.equals(localNode)) { - return; - } - transport.connectToNode(node); + connectToNode(node, null); } /** - * Lightly connect to the specified node + * Connect to the specified node with the given connection profile * * @param node the node to connect to + * @param connectionProfile the connection profile to use when connecting to this node */ - public void connectToNodeLight(final DiscoveryNode node) { + public void connectToNode(final DiscoveryNode node, ConnectionProfile connectionProfile) { if (node.equals(localNode)) { return; } - transport.connectToNodeLight(node); + transport.connectToNode(node, connectionProfile); } /** @@ -313,10 +307,10 @@ public class TransportService extends AbstractLifecycleComponent { * @throws ConnectTransportException if the connection or the * handshake failed */ - public DiscoveryNode connectToNodeLightAndHandshake( + public DiscoveryNode connectToNodeAndHandshake( final DiscoveryNode node, final long handshakeTimeout) throws ConnectTransportException { - return connectToNodeLightAndHandshake(node, handshakeTimeout, true); + return connectToNodeAndHandshake(node, handshakeTimeout, true); } /** @@ -333,14 +327,14 @@ public class TransportService extends AbstractLifecycleComponent { * @throws ConnectTransportException if the connection failed * @throws IllegalStateException if the handshake failed */ - public DiscoveryNode connectToNodeLightAndHandshake( + public DiscoveryNode connectToNodeAndHandshake( final DiscoveryNode node, final long handshakeTimeout, final boolean checkClusterName) { if (node.equals(localNode)) { return localNode; } - transport.connectToNodeLight(node); + transport.connectToNode(node, ConnectionProfile.LIGHT_PROFILE); try { return handshake(node, handshakeTimeout, checkClusterName); } catch (ConnectTransportException | IllegalStateException e) { diff --git a/core/src/main/java/org/elasticsearch/transport/support/TransportStatus.java b/core/src/main/java/org/elasticsearch/transport/TransportStatus.java similarity index 95% rename from core/src/main/java/org/elasticsearch/transport/support/TransportStatus.java rename to core/src/main/java/org/elasticsearch/transport/TransportStatus.java index 29c2bfb2781..42e0e962a62 100644 --- a/core/src/main/java/org/elasticsearch/transport/support/TransportStatus.java +++ b/core/src/main/java/org/elasticsearch/transport/TransportStatus.java @@ -17,9 +17,9 @@ * under the License. */ -package org.elasticsearch.transport.support; +package org.elasticsearch.transport; -public class TransportStatus { +final class TransportStatus { private static final byte STATUS_REQRES = 1 << 0; private static final byte STATUS_ERROR = 1 << 1; diff --git a/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java b/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java new file mode 100644 index 00000000000..fafe8a954c8 --- /dev/null +++ b/core/src/test/java/org/apache/lucene/analysis/synonym/SynonymGraphFilterTests.java @@ -0,0 +1,1074 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.analysis.synonym; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.BaseTokenStreamTestCase; +import org.apache.lucene.analysis.CannedTokenStream; +import org.apache.lucene.analysis.MockAnalyzer; +import org.apache.lucene.analysis.MockGraphTokenFilter; +import org.apache.lucene.analysis.MockTokenizer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.TokenStreamToAutomaton; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.RandomIndexWriter; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TokenStreamToTermAutomatonQuery; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.CharsRefBuilder; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.IntsRef; +import org.apache.lucene.util.TestUtil; +import org.apache.lucene.util.automaton.Automaton; +import org.apache.lucene.util.automaton.AutomatonTestUtil; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.TooComplexToDeterminizeException; +import org.apache.lucene.util.automaton.Transition; +import org.apache.lucene.util.fst.Util; + +import java.io.IOException; +import java.io.StringReader; +import java.text.ParseException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class SynonymGraphFilterTests extends BaseTokenStreamTestCase { + + /** + * Set a side effect by {@link #getAnalyzer}. + */ + private SynonymGraphFilter synFilter; + + // LUCENE-6664 + public static void assertAnalyzesToPositions(Analyzer a, String input, String[] output, String[] types, int[] posIncrements, int[] + posLengths) throws IOException { + assertAnalyzesTo(a, input, output, null, null, types, posIncrements, posLengths); + } + + public void testBasicKeepOrigOneOutput() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b", new String[]{"c", "x", "a", "b"}, new int[]{0, 2, 2, 4}, new int[]{1, 5, 3, 5}, new String[]{"word", + "SYNONYM", "word", "word"}, new int[]{1, 1, 0, 1}, new int[]{1, 2, 1, 1}); + a.close(); + } + + public void testMixedKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", true); + add(b, "e f", "y", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b c e f g", new String[]{"c", "x", "a", "b", "c", "y", "g"}, new int[]{0, 2, 2, 4, 6, 8, 12}, new + int[]{1, 5, 3, 5, 7, 11, 13}, new String[]{"word", "SYNONYM", "word", "word", "word", "SYNONYM", "word"}, new + int[]{1, 1, 0, + 1, 1, 1, 1}, new int[]{1, 2, 1, 1, 1, 1, 1}); + a.close(); + } + + public void testNoParseAfterBuffer() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "b a", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "b b b", new String[]{"b", "b", "b"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word", + "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1}); + a.close(); + } + + public void testOneInputMultipleOutputKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", true); + add(b, "a b", "y", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b c", new String[]{"c", "x", "y", "a", "b", "c"}, new int[]{0, 2, 2, 2, 4, 6}, new int[]{1, 5, 5, 3, 5, + 7}, new String[]{"word", "SYNONYM", "SYNONYM", "word", "word", "word"}, new int[]{1, 1, 0, 0, 1, 1, 1, 1}, new + int[]{1, 2, 2, + 1, 1, 1, 1, 1}); + a.close(); + } + + /** + * parse a syn file with bad syntax + */ + public void testInvalidAnalyzesToNothingOutput() throws Exception { + String testFile = "a => 1"; + Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, false); + SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); + try { + parser.parse(new StringReader(testFile)); + fail("didn't get expected exception"); + } catch (ParseException expected) { + // expected exc + } + analyzer.close(); + } + + /** + * parse a syn file with bad syntax + */ + public void testInvalidDoubleMap() throws Exception { + String testFile = "a => b => c"; + Analyzer analyzer = new MockAnalyzer(random()); + SolrSynonymParser parser = new SolrSynonymParser(true, true, analyzer); + try { + parser.parse(new StringReader(testFile)); + fail("didn't get expected exception"); + } catch (ParseException expected) { + // expected exc + } + analyzer.close(); + } + + public void testMoreThanOneLookAhead() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b c d", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "a b c e", new String[]{"a", "b", "c", "e"}, new int[]{0, 2, 4, 6}, new int[]{1, 3, 5, 7}, new + String[]{"word", "word", "word", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1}); + a.close(); + } + + public void testLookaheadAfterParse() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "b b", "x", true); + add(b, "b", "y", true); + + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "b a b b", new String[]{"y", "b", "a", "x", "b", "b"}, new int[]{0, 0, 2, 4, 4, 6}, new int[]{1, 1, 3, 7, 5, + 7}, null, new int[]{1, 0, 1, 1, 0, 1}, new int[]{1, 1, 1, 2, 1, 1}, true); + } + + public void testLookaheadSecondParse() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "b b b", "x", true); + add(b, "b", "y", true); + + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "b b", new String[]{"y", "b", "y", "b"}, new int[]{0, 0, 2, 2}, new int[]{1, 1, 3, 3}, null, new int[]{1, 0, + 1, 0}, new int[]{1, 1, 1, 1}, true); + } + + public void testOneInputMultipleOutputNoKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", false); + add(b, "a b", "y", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b c", new String[]{"c", "x", "y", "c"}, new int[]{0, 2, 2, 6}, new int[]{1, 5, 5, 7}, new + String[]{"word", "SYNONYM", "SYNONYM", "word"}, new int[]{1, 1, 0, 1}, new int[]{1, 1, 1, 1}); + a.close(); + } + + public void testOneInputMultipleOutputMixedKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", true); + add(b, "a b", "y", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b c", new String[]{"c", "x", "y", "a", "b", "c"}, new int[]{0, 2, 2, 2, 4, 6}, new int[]{1, 5, 5, 3, 5, + 7}, new String[]{"word", "SYNONYM", "SYNONYM", "word", "word", "word"}, new int[]{1, 1, 0, 0, 1, 1, 1, 1}, new + int[]{1, 2, 2, + 1, 1, 1, 1, 1}); + a.close(); + } + + public void testSynAtEnd() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c d e a b", new String[]{"c", "d", "e", "x", "a", "b"}, new int[]{0, 2, 4, 6, 6, 8}, new int[]{1, 3, 5, 9, + 7, 9}, new String[]{"word", "word", "word", "SYNONYM", "word", "word"}, new int[]{1, 1, 1, 1, 0, 1}, new int[]{1, 1, 1, + 2, 1, + 1}); + a.close(); + } + + public void testTwoSynsInARow() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a", "x", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a a b", new String[]{"c", "x", "x", "b"}, new int[]{0, 2, 4, 6}, new int[]{1, 3, 5, 7}, new + String[]{"word", "SYNONYM", "SYNONYM", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1}); + a.close(); + } + + public void testBasicKeepOrigTwoOutputs() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x y", true); + add(b, "a b", "m n o", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b d", new String[]{"c", "x", "m", "a", "y", "n", "o", "b", "d"}, new int[]{0, 2, 2, 2, 2, 2, 2, 4, 6}, + new int[]{1, 5, 5, 3, 5, 5, 5, 5, 7}, new String[]{"word", "SYNONYM", "SYNONYM", "word", "SYNONYM", + "SYNONYM", "SYNONYM", + "word", "word"}, new int[]{1, 1, 0, 0, 1, 1, 1, 1, 1}, new int[]{1, 1, 2, 4, 4, 1, 2, 1, 1}); + a.close(); + } + + public void testNoCaptureIfNoMatch() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x y", true); + + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "c d d", new String[]{"c", "d", "d"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word", + "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1}); + assertEquals(0, synFilter.getCaptureCount()); + a.close(); + } + + public void testBasicNotKeepOrigOneOutput() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b", new String[]{"c", "x"}, new int[]{0, 2}, new int[]{1, 5}, new String[]{"word", "SYNONYM"}, new + int[]{1, 1}, new int[]{1, 1}); + a.close(); + } + + public void testBasicNoKeepOrigTwoOutputs() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x y", false); + add(b, "a b", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b d", new String[]{"c", "x", "m", "y", "n", "o", "d"}, new int[]{0, 2, 2, 2, 2, 2, 6}, new int[]{1, 5, + 5, 5, 5, 5, 7}, new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", + "word"}, new int[]{1, 1, 0, 1, 1, + 1, 1}, new int[]{1, 1, 2, 3, 1, 1, 1}); + a.close(); + } + + public void testIgnoreCase() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x y", false); + add(b, "a b", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c A B D", new String[]{"c", "x", "m", "y", "n", "o", "D"}, new int[]{0, 2, 2, 2, 2, 2, 6}, new int[]{1, 5, + 5, 5, 5, 5, 7}, new String[]{"word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", + "word"}, new int[]{1, 1, 0, 1, 1, + 1, 1}, new int[]{1, 1, 2, 3, 1, 1, 1}); + a.close(); + } + + public void testDoNotIgnoreCase() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x y", false); + add(b, "a b", "m n o", false); + + Analyzer a = getAnalyzer(b, false); + assertAnalyzesTo(a, "c A B D", new String[]{"c", "A", "B", "D"}, new int[]{0, 2, 4, 6}, new int[]{1, 3, 5, 7}, new + String[]{"word", "word", "word", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1}); + a.close(); + } + + public void testBufferedFinish1() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b c", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a b", new String[]{"c", "a", "b"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word", + "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1}); + a.close(); + } + + public void testBufferedFinish2() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "m n o", false); + add(b, "d e", "m n o", false); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "c a d", new String[]{"c", "a", "d"}, new int[]{0, 2, 4}, new int[]{1, 3, 5}, new String[]{"word", "word", + "word"}, new int[]{1, 1, 1}, new int[]{1, 1, 1}); + a.close(); + } + + public void testCanReuse() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b", "x", true); + Analyzer a = getAnalyzer(b, true); + for (int i = 0; i < 10; i++) { + assertAnalyzesTo(a, "c a b", new String[]{"c", "x", "a", "b"}, new int[]{0, 2, 2, 4}, new int[]{1, 5, 3, 5}, new + String[]{"word", "SYNONYM", "word", "word"}, new int[]{1, 1, 0, 1}, new int[]{1, 2, 1, 1}); + } + a.close(); + } + + /** + * Multiple input tokens map to a single output token + */ + public void testManyToOne() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b c", "z", true); + + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "a b c d", new String[]{"z", "a", "b", "c", "d"}, new int[]{0, 0, 2, 4, 6}, new int[]{5, 1, 3, 5, 7}, new + String[]{"SYNONYM", "word", "word", "word", "word"}, new int[]{1, 0, 1, 1, 1}, new int[]{3, 1, 1, 1, 1}); + a.close(); + } + + public void testBufferAfterMatch() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "a b c d", "x", true); + add(b, "a b", "y", false); + + // The 'c' token has to be buffered because SynGraphFilter + // needs to know whether a b c d -> x matches: + Analyzer a = getAnalyzer(b, true); + assertAnalyzesTo(a, "f a b c e", new String[]{"f", "y", "c", "e"}, new int[]{0, 2, 6, 8}, new int[]{1, 5, 7, 9}, new + String[]{"word", "SYNONYM", "word", "word"}, new int[]{1, 1, 1, 1}, new int[]{1, 1, 1, 1}); + a.close(); + } + + public void testZeroSyns() throws Exception { + Tokenizer tokenizer = new MockTokenizer(); + tokenizer.setReader(new StringReader("aa bb")); + try { + new SynonymGraphFilter(tokenizer, new SynonymMap.Builder(true).build(), true); + fail("did not hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + assertEquals("fst must be non-null", iae.getMessage()); + } + } + + // Needs TermAutomatonQuery, which is in sandbox still: + public void testAccurateGraphQuery1() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("field", "wtf happened", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "what the fudge", "wtf", true); + + SynonymMap map = b.build(); + + TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery(); + + + TokenStream in = new CannedTokenStream(0, 23, token("what", 1, 1, 0, 4), token("the", 1, 1, 5, 8), token("fudge", 1, 1, 9, 14), + token("happened", 1, 1, 15, 23)); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + in = new CannedTokenStream(0, 12, token("wtf", 1, 1, 0, 3), token("happened", 1, 1, 4, 12)); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + // "what happened" should NOT match: + in = new CannedTokenStream(0, 13, token("what", 1, 1, 0, 4), token("happened", 1, 1, 5, 13)); + assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + IOUtils.close(r, dir); + } + + + /** + * If we expand synonyms at search time, the results are correct. + */ + // Needs TermAutomatonQuery, which is in sandbox still: + public void testAccurateGraphQuery2() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("field", "say wtf happened", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "fudge", "chocolate", true); + add(b, "what the fudge", "wtf", true); + add(b, "what the", "wut", true); + add(b, "say", "say what", true); + + SynonymMap map = b.build(); + + TokenStream in = new CannedTokenStream(0, 26, token("say", 1, 1, 0, 3), token("what", 1, 1, 3, 7), token("the", 1, 1, 8, 11), + token("fudge", 1, 1, 12, 17), token("happened", 1, 1, 18, 26)); + + TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery(); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + // "what happened" should NOT match: + in = new CannedTokenStream(0, 13, token("what", 1, 1, 0, 4), token("happened", 1, 1, 5, 13)); + assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + IOUtils.close(r, dir); + } + + + // Needs TermAutomatonQuery, which is in sandbox still: + public void testAccurateGraphQuery3() throws Exception { + Directory dir = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), dir); + Document doc = new Document(); + doc.add(newTextField("field", "say what the fudge happened", Field.Store.NO)); + w.addDocument(doc); + IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + + SynonymMap.Builder b = new SynonymMap.Builder(true); + add(b, "wtf", "what the fudge", true); + + SynonymMap map = b.build(); + + TokenStream in = new CannedTokenStream(0, 15, token("say", 1, 1, 0, 3), token("wtf", 1, 1, 3, 6), token("happened", 1, 1, 7, 15)); + + TokenStreamToTermAutomatonQuery ts2q = new TokenStreamToTermAutomatonQuery(); + + assertEquals(1, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + // "what happened" should NOT match: + in = new CannedTokenStream(0, 13, token("what", 1, 1, 0, 4), token("happened", 1, 1, 5, 13)); + assertEquals(0, s.count(ts2q.toQuery("field", new SynonymGraphFilter(in, map, true)))); + + IOUtils.close(r, dir); + } + + private static Token token(String term, int posInc, int posLength, int startOffset, int endOffset) { + final Token t = new Token(term, startOffset, endOffset); + t.setPositionIncrement(posInc); + t.setPositionLength(posLength); + return t; + } + + private String randomNonEmptyString() { + while (true) { + String s = TestUtil.randomUnicodeString(random()).trim(); + //String s = TestUtil.randomSimpleString(random()).trim(); + if (s.length() != 0 && s.indexOf('\u0000') == -1) { + return s; + } + } + } + + // Adds MockGraphTokenFilter after SynFilter: + public void testRandomGraphAfter() throws Exception { + final int numIters = atLeast(3); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final SynonymMap map = b.build(); + final boolean ignoreCase = random().nextBoolean(); + + final Analyzer analyzer = new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true); + TokenStream syns = new SynonymGraphFilter(tokenizer, map, ignoreCase); + TokenStream graph = new MockGraphTokenFilter(random(), syns); + return new TokenStreamComponents(tokenizer, graph); + } + }; + + checkRandomData(random(), analyzer, 100); + analyzer.close(); + } + } + + public void testEmptyStringInput() throws IOException { + final int numIters = atLeast(10); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + + Analyzer analyzer = getAnalyzer(b, ignoreCase); + + checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), ""); + analyzer.close(); + } + } + + /** + * simple random test, doesn't verify correctness. + * does verify it doesnt throw exceptions, or that the stream doesn't misbehave + */ + public void testRandom2() throws Exception { + final int numIters = atLeast(3); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + + Analyzer analyzer = getAnalyzer(b, ignoreCase); + checkRandomData(random(), analyzer, 100); + analyzer.close(); + } + } + + /** + * simple random test like testRandom2, but for larger docs + */ + public void testRandomHuge() throws Exception { + final int numIters = atLeast(3); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + //if (VERBOSE) { + //System.out.println("TEST: iter=" + i + " numEntries=" + numEntries); + //} + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + + Analyzer analyzer = getAnalyzer(b, ignoreCase); + checkRandomData(random(), analyzer, 100, 1024); + analyzer.close(); + } + } + + public void testEmptyTerm() throws IOException { + final int numIters = atLeast(10); + for (int i = 0; i < numIters; i++) { + SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean()); + final int numEntries = atLeast(10); + for (int j = 0; j < numEntries; j++) { + add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean()); + } + final boolean ignoreCase = random().nextBoolean(); + + final Analyzer analyzer = getAnalyzer(b, ignoreCase); + + checkAnalysisConsistency(random(), analyzer, random().nextBoolean(), ""); + analyzer.close(); + } + } + + public void testBuilderDedup() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "a b", new String[]{"ab"}, new int[]{1}); + a.close(); + } + + public void testBuilderNoDedup() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(false); + final boolean keepOrig = false; + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + add(b, "a b", "ab", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "a b", new String[]{"ab", "ab", "ab"}, new int[]{1, 0, 0}); + a.close(); + } + + public void testRecursion1() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "zoo", "zoo", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "zoo zoo $ zoo", new String[]{"zoo", "zoo", "$", "zoo"}, new int[]{1, 1, 1, 1}); + a.close(); + } + + public void testRecursion2() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = false; + add(b, "zoo", "zoo", keepOrig); + add(b, "zoo", "zoo zoo", keepOrig); + Analyzer a = getAnalyzer(b, true); + + // verify("zoo zoo $ zoo", "zoo/zoo zoo/zoo/zoo $/zoo zoo/zoo zoo"); + assertAnalyzesTo(a, "zoo zoo $ zoo", new String[]{"zoo", "zoo", "zoo", "zoo", "zoo", "zoo", "$", "zoo", "zoo", "zoo"}, new + int[]{1, 0, 1, 1, 0, 1, 1, 1, 0, 1}); + a.close(); + } + + public void testKeepOrig() throws Exception { + SynonymMap.Builder b = new SynonymMap.Builder(true); + final boolean keepOrig = true; + add(b, "a b", "ab", keepOrig); + add(b, "a c", "ac", keepOrig); + add(b, "a", "aa", keepOrig); + add(b, "b", "bb", keepOrig); + add(b, "z x c v", "zxcv", keepOrig); + add(b, "x c", "xc", keepOrig); + Analyzer a = getAnalyzer(b, true); + + assertAnalyzesTo(a, "$", new String[]{"$"}, new int[]{1}); + assertAnalyzesTo(a, "a", new String[]{"aa", "a"}, new int[]{1, 0}); + assertAnalyzesTo(a, "a", new String[]{"aa", "a"}, new int[]{1, 0}); + assertAnalyzesTo(a, "$ a", new String[]{"$", "aa", "a"}, new int[]{1, 1, 0}); + assertAnalyzesTo(a, "a $", new String[]{"aa", "a", "$"}, new int[]{1, 0, 1}); + assertAnalyzesTo(a, "$ a !", new String[]{"$", "aa", "a", "!"}, new int[]{1, 1, 0, 1}); + assertAnalyzesTo(a, "a a", new String[]{"aa", "a", "aa", "a"}, new int[]{1, 0, 1, 0}); + assertAnalyzesTo(a, "b", new String[]{"bb", "b"}, new int[]{1, 0}); + assertAnalyzesTo(a, "z x c v", new String[]{"zxcv", "z", "x", "c", "v"}, new int[]{1, 0, 1, 1, 1}); + assertAnalyzesTo(a, "z x c $", new String[]{"z", "xc", "x", "c", "$"}, new int[]{1, 1, 0, 1, 1}); + a.close(); + } + + private Analyzer getAnalyzer(SynonymMap.Builder b, final boolean ignoreCase) throws IOException { + final SynonymMap map = b.build(); + return new Analyzer() { + @Override + protected TokenStreamComponents createComponents(String fieldName) { + Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false); + // Make a local variable so testRandomHuge doesn't share it across threads! + SynonymGraphFilter synFilter = new SynonymGraphFilter(tokenizer, map, ignoreCase); + SynonymGraphFilterTests.this.synFilter = synFilter; + return new TokenStreamComponents(tokenizer, synFilter); + } + }; + } + + private void add(SynonymMap.Builder b, String input, String output, boolean keepOrig) { + if (VERBOSE) { + //System.out.println(" add input=" + input + " output=" + output + " keepOrig=" + keepOrig); + } + CharsRefBuilder inputCharsRef = new CharsRefBuilder(); + SynonymMap.Builder.join(input.split(" +"), inputCharsRef); + + CharsRefBuilder outputCharsRef = new CharsRefBuilder(); + SynonymMap.Builder.join(output.split(" +"), outputCharsRef); + + b.add(inputCharsRef.get(), outputCharsRef.get(), keepOrig); + } + + private char[] randomBinaryChars(int minLen, int maxLen, double bias, char base) { + int len = TestUtil.nextInt(random(), minLen, maxLen); + char[] chars = new char[len]; + for (int i = 0; i < len; i++) { + char ch; + if (random().nextDouble() < bias) { + ch = base; + } else { + ch = (char) (base + 1); + } + chars[i] = ch; + } + + return chars; + } + + private static String toTokenString(char[] chars) { + StringBuilder b = new StringBuilder(); + for (char c : chars) { + if (b.length() > 0) { + b.append(' '); + } + b.append(c); + } + return b.toString(); + } + + private static class OneSyn { + char[] in; + char[] out; + boolean keepOrig; + + @Override + public String toString() { + return toTokenString(in) + " --> " + toTokenString(out) + " (keepOrig=" + keepOrig + ")"; + } + } + + public void testRandomSyns() throws Exception { + int synCount = atLeast(10); + double bias = random().nextDouble(); + boolean dedup = random().nextBoolean(); + + SynonymMap.Builder b = new SynonymMap.Builder(dedup); + List syns = new ArrayList<>(); + // Makes random syns from random a / b tokens, mapping to random x / y tokens + //if (VERBOSE) { + // System.out.println("TEST: make " + synCount + " syns"); + // System.out.println(" bias for a over b=" + bias); + // System.out.println(" dedup=" + dedup); + // System.out.println(" sausage=" + sausage); + //} + + int maxSynLength = 0; + + for (int i = 0; i < synCount; i++) { + OneSyn syn = new OneSyn(); + syn.in = randomBinaryChars(1, 5, bias, 'a'); + syn.out = randomBinaryChars(1, 5, 0.5, 'x'); + syn.keepOrig = random().nextBoolean(); + syns.add(syn); + + maxSynLength = Math.max(maxSynLength, syn.in.length); + + //if (VERBOSE) { + // System.out.println(" " + syn); + //} + add(b, toTokenString(syn.in), toTokenString(syn.out), syn.keepOrig); + } + + // Only used w/ VERBOSE: + Analyzer aNoSausageed; + if (VERBOSE) { + aNoSausageed = getAnalyzer(b, true); + } else { + aNoSausageed = null; + } + + Analyzer a = getAnalyzer(b, true); + int iters = atLeast(20); + for (int iter = 0; iter < iters; iter++) { + + String doc = toTokenString(randomBinaryChars(50, 100, bias, 'a')); + //String doc = toTokenString(randomBinaryChars(10, 50, bias, 'a')); + + //if (VERBOSE) { + // System.out.println("TEST: iter=" + iter + " doc=" + doc); + //} + Automaton expected = slowSynFilter(doc, syns); + if (VERBOSE) { + //System.out.println(" expected:\n" + expected.toDot()); + } + Automaton actual = toAutomaton(a.tokenStream("field", new StringReader(doc))); + //if (VERBOSE) { + // System.out.println(" actual:\n" + actual.toDot()); + //} + + assertTrue("maxLookaheadUsed=" + synFilter.getMaxLookaheadUsed() + " maxSynLength=" + maxSynLength, synFilter + .getMaxLookaheadUsed() <= maxSynLength); + + checkAnalysisConsistency(random(), a, random().nextBoolean(), doc); + // We can easily have a non-deterministic automaton at this point, e.g. if + // more than one syn matched at given point, or if the syn mapped to an + // output token that also happens to be in the input: + try { + actual = Operations.determinize(actual, 50000); + } catch (TooComplexToDeterminizeException tctde) { + // Unfortunately the syns can easily create difficult-to-determinize graphs: + assertTrue(approxEquals(actual, expected)); + continue; + } + + try { + expected = Operations.determinize(expected, 50000); + } catch (TooComplexToDeterminizeException tctde) { + // Unfortunately the syns can easily create difficult-to-determinize graphs: + assertTrue(approxEquals(actual, expected)); + continue; + } + + assertTrue(approxEquals(actual, expected)); + assertTrue(Operations.sameLanguage(actual, expected)); + } + + a.close(); + } + + /** + * Only used when true equality is too costly to check! + */ + private boolean approxEquals(Automaton actual, Automaton expected) { + // Don't collapse these into one line else the thread stack won't say which direction failed!: + boolean b1 = approxSubsetOf(actual, expected); + boolean b2 = approxSubsetOf(expected, actual); + return b1 && b2; + } + + private boolean approxSubsetOf(Automaton a1, Automaton a2) { + AutomatonTestUtil.RandomAcceptedStrings ras = new AutomatonTestUtil.RandomAcceptedStrings(a1); + for (int i = 0; i < 2000; i++) { + int[] ints = ras.getRandomAcceptedString(random()); + IntsRef path = new IntsRef(ints, 0, ints.length); + if (accepts(a2, path) == false) { + throw new RuntimeException("a2 does not accept " + path); + } + } + + // Presumed true + return true; + } + + /** + * Like {@link Operations#run} except the incoming automaton is allowed to be non-deterministic. + */ + private static boolean accepts(Automaton a, IntsRef path) { + Set states = new HashSet<>(); + states.add(0); + Transition t = new Transition(); + for (int i = 0; i < path.length; i++) { + int digit = path.ints[path.offset + i]; + Set nextStates = new HashSet<>(); + for (int state : states) { + int count = a.initTransition(state, t); + for (int j = 0; j < count; j++) { + a.getNextTransition(t); + if (digit >= t.min && digit <= t.max) { + nextStates.add(t.dest); + } + } + } + states = nextStates; + if (states.isEmpty()) { + return false; + } + } + + for (int state : states) { + if (a.isAccept(state)) { + return true; + } + } + + return false; + } + + /** + * Stupid, slow brute-force, yet hopefully bug-free, synonym filter. + */ + private Automaton slowSynFilter(String doc, List syns) { + String[] tokens = doc.split(" +"); + //if (VERBOSE) { + // System.out.println(" doc has " + tokens.length + " tokens"); + //} + int i = 0; + Automaton.Builder a = new Automaton.Builder(); + int lastState = a.createState(); + while (i < tokens.length) { + // Consider all possible syn matches starting at this point: + assert tokens[i].length() == 1; + //if (VERBOSE) { + // System.out.println(" i=" + i); + //} + + List matches = new ArrayList<>(); + for (OneSyn syn : syns) { + if (i + syn.in.length <= tokens.length) { + boolean match = true; + for (int j = 0; j < syn.in.length; j++) { + if (tokens[i + j].charAt(0) != syn.in[j]) { + match = false; + break; + } + } + + if (match) { + if (matches.isEmpty() == false) { + if (syn.in.length < matches.get(0).in.length) { + // Greedy matching: we already found longer syns matching here + continue; + } else if (syn.in.length > matches.get(0).in.length) { + // Greedy matching: all previous matches were shorter, so we drop them + matches.clear(); + } else { + // Keep the current matches: we allow multiple synonyms matching the same input string + } + } + + matches.add(syn); + } + } + } + + int nextState = a.createState(); + + if (matches.isEmpty() == false) { + // We have match(es) starting at this token + //if (VERBOSE) { + // System.out.println(" matches @ i=" + i + ": " + matches); + //} + // We keepOrig if any of the matches said to: + boolean keepOrig = false; + for (OneSyn syn : matches) { + keepOrig |= syn.keepOrig; + } + + if (keepOrig) { + // Add path for the original tokens + addSidePath(a, lastState, nextState, matches.get(0).in); + } + + for (OneSyn syn : matches) { + addSidePath(a, lastState, nextState, syn.out); + } + + i += matches.get(0).in.length; + } else { + a.addTransition(lastState, nextState, tokens[i].charAt(0)); + i++; + } + + lastState = nextState; + } + + a.setAccept(lastState, true); + + return topoSort(a.finish()); + } + + /** + * Just creates a side path from startState to endState with the provided tokens. + */ + private static void addSidePath(Automaton.Builder a, int startState, int endState, char[] tokens) { + int lastState = startState; + for (int i = 0; i < tokens.length; i++) { + int nextState; + if (i == tokens.length - 1) { + nextState = endState; + } else { + nextState = a.createState(); + } + + a.addTransition(lastState, nextState, tokens[i]); + + lastState = nextState; + } + } + + private Automaton toAutomaton(TokenStream ts) throws IOException { + PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); + PositionLengthAttribute posLenAtt = ts.addAttribute(PositionLengthAttribute.class); + CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); + ts.reset(); + Automaton a = new Automaton(); + int srcNode = -1; + int destNode = -1; + int state = a.createState(); + while (ts.incrementToken()) { + assert termAtt.length() == 1; + char c = termAtt.charAt(0); + int posInc = posIncAtt.getPositionIncrement(); + if (posInc != 0) { + srcNode += posInc; + while (state < srcNode) { + state = a.createState(); + } + } + destNode = srcNode + posLenAtt.getPositionLength(); + while (state < destNode) { + state = a.createState(); + } + a.addTransition(srcNode, destNode, c); + } + ts.end(); + ts.close(); + a.finishState(); + a.setAccept(destNode, true); + return a; + } + + /** + * Renumbers nodes according to their topo sort + */ + private Automaton topoSort(Automaton in) { + int[] newToOld = Operations.topoSortStates(in); + int[] oldToNew = new int[newToOld.length]; + + Automaton.Builder a = new Automaton.Builder(); + //System.out.println("remap:"); + for (int i = 0; i < newToOld.length; i++) { + a.createState(); + oldToNew[newToOld[i]] = i; + //System.out.println(" " + newToOld[i] + " -> " + i); + if (in.isAccept(newToOld[i])) { + a.setAccept(i, true); + //System.out.println(" **"); + } + } + + Transition t = new Transition(); + for (int i = 0; i < newToOld.length; i++) { + int count = in.initTransition(newToOld[i], t); + for (int j = 0; j < count; j++) { + in.getNextTransition(t); + a.addTransition(i, oldToNew[t.dest], t.min, t.max); + } + } + + return a.finish(); + } + + /** + * Helper method to validate all strings that can be generated from a token stream. Uses {@link + * TokenStreamToAutomaton} to create an automaton. Asserts the finite strings of the automaton + * are all and only the given valid strings. + * + * @param analyzer analyzer containing the SynonymFilter under test. + * @param text text to be analyzed. + * @param expectedStrings all expected finite strings. + */ + public void assertAllStrings(Analyzer analyzer, String text, String[] expectedStrings) throws IOException { + TokenStream tokenStream = analyzer.tokenStream("dummy", text); + try { + Automaton automaton = new TokenStreamToAutomaton().toAutomaton(tokenStream); + Set finiteStrings = AutomatonTestUtil.getFiniteStringsRecursive(automaton, -1); + + assertEquals("Invalid resulting strings count. Expected " + expectedStrings.length + " was " + finiteStrings.size(), + expectedStrings.length, finiteStrings.size()); + + Set expectedStringsSet = new HashSet<>(Arrays.asList(expectedStrings)); + + BytesRefBuilder scratchBytesRefBuilder = new BytesRefBuilder(); + for (IntsRef ir : finiteStrings) { + String s = Util.toBytesRef(ir, scratchBytesRefBuilder).utf8ToString().replace((char) TokenStreamToAutomaton.POS_SEP, ' '); + assertTrue("Unexpected string found: " + s, expectedStringsSet.contains(s)); + } + } finally { + tokenStream.close(); + } + } +} diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java index 23fdf3499b2..102e16691d9 100644 --- a/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainIT.java @@ -135,20 +135,20 @@ public final class ClusterAllocationExplainIT extends ESIntegTestCase { assertEquals(d.type(), Decision.Type.NO); if (noAttrNode.equals(nodeName)) { - assertThat(d.toString(), containsString("node does not match index include filters [foo:\"bar\"]")); + assertThat(d.toString(), containsString("node does not match [index.routing.allocation.include] filters [foo:\"bar\"]")); assertNull(storeStatus); assertEquals("the shard cannot be assigned because one or more allocation decider returns a 'NO' decision", explanation.getFinalExplanation()); assertEquals(ClusterAllocationExplanation.FinalDecision.NO, finalDecision); } else if (barAttrNode.equals(nodeName)) { - assertThat(d.toString(), containsString("node does not match index include filters [foo:\"bar\"]")); + assertThat(d.toString(), containsString("node does not match [index.routing.allocation.include] filters [foo:\"bar\"]")); barAttrWeight = weight; assertNull(storeStatus); assertEquals("the shard cannot be assigned because one or more allocation decider returns a 'NO' decision", explanation.getFinalExplanation()); assertEquals(ClusterAllocationExplanation.FinalDecision.NO, finalDecision); } else if (fooBarAttrNode.equals(nodeName)) { - assertThat(d.toString(), containsString("the shard cannot be allocated on the same node id")); + assertThat(d.toString(), containsString("the shard cannot be allocated to the same node")); fooBarAttrWeight = weight; assertEquals(storeStatus.getAllocationStatus(), IndicesShardStoresResponse.StoreStatus.AllocationStatus.PRIMARY); diff --git a/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainTests.java b/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainTests.java index 895450e6d5b..329cc3805ab 100644 --- a/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainTests.java +++ b/core/src/test/java/org/elasticsearch/action/admin/cluster/allocation/ClusterAllocationExplainTests.java @@ -19,7 +19,6 @@ package org.elasticsearch.action.admin.cluster.allocation; -import org.elasticsearch.client.Requests; import org.elasticsearch.cluster.routing.allocation.decider.Decision; import org.elasticsearch.test.ESSingleNodeTestCase; @@ -51,12 +50,12 @@ public final class ClusterAllocationExplainTests extends ESSingleNodeTestCase { assertEquals(Decision.Type.NO, d.type()); assertEquals(ClusterAllocationExplanation.FinalDecision.NO, fd); assertEquals(ClusterAllocationExplanation.StoreCopy.AVAILABLE, storeCopy); - assertTrue(d.toString(), d.toString().contains("NO(the shard cannot be allocated on the same node id")); + assertTrue(d.toString(), d.toString().contains("NO(the shard cannot be allocated to the same node")); assertTrue(d instanceof Decision.Multi); Decision.Multi md = (Decision.Multi) d; Decision ssd = md.getDecisions().get(0); assertEquals(Decision.Type.NO, ssd.type()); - assertTrue(ssd.toString(), ssd.toString().contains("NO(the shard cannot be allocated on the same node id")); + assertTrue(ssd.toString(), ssd.toString().contains("NO(the shard cannot be allocated to the same node")); Float weight = explanation.getWeight(); assertNotNull("should have a weight", weight); @@ -78,12 +77,14 @@ public final class ClusterAllocationExplainTests extends ESSingleNodeTestCase { assertEquals(Decision.Type.NO, d.type()); assertEquals(ClusterAllocationExplanation.FinalDecision.ALREADY_ASSIGNED, fd); assertEquals(ClusterAllocationExplanation.StoreCopy.AVAILABLE, storeCopy); - assertTrue(d.toString(), d.toString().contains("NO(the shard cannot be allocated on the same node id")); + assertTrue(d.toString(), d.toString().contains( + "NO(the shard cannot be allocated to the node on which it already exists [[test][0]")); assertTrue(d instanceof Decision.Multi); md = (Decision.Multi) d; ssd = md.getDecisions().get(0); assertEquals(Decision.Type.NO, ssd.type()); - assertTrue(ssd.toString(), ssd.toString().contains("NO(the shard cannot be allocated on the same node id")); + assertTrue(ssd.toString(), ssd.toString().contains( + "NO(the shard cannot be allocated to the node on which it already exists [[test][0]")); weight = explanation.getWeight(); assertNotNull("should have a weight", weight); diff --git a/core/src/test/java/org/elasticsearch/client/transport/FailAndRetryMockTransport.java b/core/src/test/java/org/elasticsearch/client/transport/FailAndRetryMockTransport.java index 908d2eb6d1e..0416f6aff41 100644 --- a/core/src/test/java/org/elasticsearch/client/transport/FailAndRetryMockTransport.java +++ b/core/src/test/java/org/elasticsearch/client/transport/FailAndRetryMockTransport.java @@ -26,13 +26,13 @@ import org.elasticsearch.action.admin.cluster.state.ClusterStateResponse; import org.elasticsearch.cluster.ClusterName; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.node.DiscoveryNode; -import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.component.Lifecycle; import org.elasticsearch.common.component.LifecycleListener; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.BoundTransportAddress; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.ConnectionProfile; import org.elasticsearch.transport.Transport; import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportRequest; @@ -153,23 +153,13 @@ abstract class FailAndRetryMockTransport imp throw new UnsupportedOperationException(); } - @Override - public boolean addressSupported(Class address) { - throw new UnsupportedOperationException(); - } - @Override public boolean nodeConnected(DiscoveryNode node) { return false; } @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - - } - - @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { } diff --git a/core/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java b/core/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java index 5dcbefbe034..7372218a9ed 100644 --- a/core/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/NodeConnectionsServiceTests.java @@ -31,6 +31,7 @@ import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.ConnectionProfile; import org.elasticsearch.transport.Transport; import org.elasticsearch.transport.TransportException; import org.elasticsearch.transport.TransportRequest; @@ -175,7 +176,6 @@ public class NodeConnectionsServiceTests extends ESTestCase { @Override public void transportServiceAdapter(TransportServiceAdapter service) { - } @Override @@ -193,27 +193,19 @@ public class NodeConnectionsServiceTests extends ESTestCase { return new TransportAddress[0]; } - @Override - public boolean addressSupported(Class address) { - return false; - } - @Override public boolean nodeConnected(DiscoveryNode node) { return connectedNodes.contains(node); } @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - if (connectedNodes.contains(node) == false && randomConnectionExceptions && randomBoolean()) { - throw new ConnectTransportException(node, "simulated"); + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { + if (connectionProfile == null) { + if (connectedNodes.contains(node) == false && randomConnectionExceptions && randomBoolean()) { + throw new ConnectTransportException(node, "simulated"); + } + connectedNodes.add(node); } - connectedNodes.add(node); - } - - @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { - } @Override diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FilterAllocationDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FilterAllocationDeciderTests.java index 3a792ae991c..5ec162eb719 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FilterAllocationDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/FilterAllocationDeciderTests.java @@ -28,9 +28,11 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.RoutingTable; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.allocator.BalancedShardsAllocator; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.cluster.routing.allocation.decider.Decision; +import org.elasticsearch.cluster.routing.allocation.decider.Decision.Type; import org.elasticsearch.cluster.routing.allocation.decider.FilterAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ReplicaAfterPrimaryActiveAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SameShardAllocationDecider; @@ -74,12 +76,23 @@ public class FilterAllocationDeciderTests extends ESAllocationTestCase { // after failing the shard we are unassigned since the node is blacklisted and we can't initialize on the other node RoutingAllocation allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, 0, false); - assertEquals(filterAllocationDecider.canAllocate(routingTable.index("idx").shard(0).primaryShard(), - state.getRoutingNodes().node("node2") - , allocation), Decision.YES); - assertEquals(filterAllocationDecider.canAllocate(routingTable.index("idx").shard(0).primaryShard(), - state.getRoutingNodes().node("node1") - , allocation), Decision.NO); + allocation.debugDecision(true); + Decision.Single decision = (Decision.Single) filterAllocationDecider.canAllocate( + routingTable.index("idx").shard(0).primaryShard(), + state.getRoutingNodes().node("node2"), allocation); + assertEquals(Type.YES, decision.type()); + assertEquals("node passes include/exclude/require filters", decision.getExplanation()); + ShardRouting primaryShard = routingTable.index("idx").shard(0).primaryShard(); + decision = (Decision.Single) filterAllocationDecider.canAllocate( + routingTable.index("idx").shard(0).primaryShard(), + state.getRoutingNodes().node("node1"), allocation); + assertEquals(Type.NO, decision.type()); + if (primaryShard.recoverySource().getType() == RecoverySource.Type.LOCAL_SHARDS) { + assertEquals("initial allocation of the shrunken index is only allowed on nodes [_id:\"node2\"] that " + + "hold a copy of every shard in the index", decision.getExplanation()); + } else { + assertEquals("initial allocation of the index is only allowed on nodes [_id:\"node2\"]", decision.getExplanation()); + } state = service.reroute(state, "try allocate again"); routingTable = state.routingTable(); @@ -114,12 +127,17 @@ public class FilterAllocationDeciderTests extends ESAllocationTestCase { allocation = new RoutingAllocation(allocationDeciders, state.getRoutingNodes(), state, null, 0, false); - assertEquals(filterAllocationDecider.canAllocate(routingTable.index("idx").shard(0).shards().get(0), - state.getRoutingNodes().node("node2") - , allocation), Decision.YES); - assertEquals(filterAllocationDecider.canAllocate(routingTable.index("idx").shard(0).shards().get(0), - state.getRoutingNodes().node("node1") - , allocation), Decision.YES); + allocation.debugDecision(true); + decision = (Decision.Single) filterAllocationDecider.canAllocate( + routingTable.index("idx").shard(0).shards().get(0), + state.getRoutingNodes().node("node2"), allocation); + assertEquals(Type.YES, decision.type()); + assertEquals("node passes include/exclude/require filters", decision.getExplanation()); + decision = (Decision.Single) filterAllocationDecider.canAllocate( + routingTable.index("idx").shard(0).shards().get(0), + state.getRoutingNodes().node("node1"), allocation); + assertEquals(Type.YES, decision.type()); + assertEquals("node passes include/exclude/require filters", decision.getExplanation()); } private ClusterState createInitialClusterState(AllocationService service, Settings settings) { diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java index c80cc9a26b9..3aba43f366a 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderTests.java @@ -33,6 +33,7 @@ import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.IndexShardRoutingTable; +import org.elasticsearch.cluster.routing.RecoverySource; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.RoutingNodes; import org.elasticsearch.cluster.routing.RoutingTable; @@ -783,9 +784,10 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { strategy.reroute(clusterState, cmds, false, false); fail("should not have been able to reroute the shard"); } catch (IllegalArgumentException e) { - assertThat("can't allocated because there isn't enough room: " + e.getMessage(), + assertThat("can't be allocated because there isn't enough room: " + e.getMessage(), e.getMessage(), - containsString("the node is above the low watermark and has more than allowed [70.0%] used disk, free: [26.0%]")); + containsString("the node is above the low watermark [cluster.routing.allocation.disk.watermark.low=0.7], using " + + "more disk space than the maximum allowed [70.0%], actual free: [26.0%]")); } } @@ -852,8 +854,13 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), false); + routingAllocation.debugDecision(true); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "the shard cannot remain on this node because it is above the high watermark " + + "[cluster.routing.allocation.disk.watermark.high=70%] and there is less than the required [30.0%] free disk on node, " + + "actual free: [20.0%]")); // Two shards consuming each 80% of disk space while 70% is allowed, but one is relocating, so shard 0 can stay firstRouting = TestShardRouting.newShardRouting("test", 0, "node1", null, true, ShardRoutingState.STARTED); @@ -874,10 +881,22 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), false); + routingAllocation.debugDecision(true); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertEquals("there is enough disk on this node for the shard to remain, free: [60b]", + ((Decision.Single) decision).getExplanation()); decision = diskThresholdDecider.canAllocate(fooRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.NO)); + if (fooRouting.recoverySource().getType() == RecoverySource.Type.EMPTY_STORE) { + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "the node is above the high watermark [cluster.routing.allocation.disk.watermark.high=70%], using more disk space than " + + "the maximum allowed [70.0%], actual free: [20.0%]")); + } else { + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "the node is above the low watermark [cluster.routing.allocation.disk.watermark.low=60%], using more disk space than " + + "the maximum allowed [60.0%], actual free: [20.0%]")); + } // Creating AllocationService instance and the services it depends on... ClusterInfoService cis = new ClusterInfoService() { @@ -972,10 +991,12 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { ClusterState clusterState = ClusterState.builder(baseClusterState).routingTable(builder.build()).build(); RoutingAllocation routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), false); + routingAllocation.debugDecision(true); Decision decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); // Two shards should start happily assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(((Decision.Single) decision).getExplanation(), containsString("there is only a single data node present")); ClusterInfoService cis = new ClusterInfoService() { @Override public ClusterInfo getClusterInfo() { @@ -1032,8 +1053,11 @@ public class DiskThresholdDeciderTests extends ESAllocationTestCase { clusterState = ClusterState.builder(updateClusterState).routingTable(builder.build()).build(); routingAllocation = new RoutingAllocation(null, new RoutingNodes(clusterState), clusterState, clusterInfo, System.nanoTime(), false); + routingAllocation.debugDecision(true); decision = diskThresholdDecider.canRemain(firstRouting, firstRoutingNode, routingAllocation); assertThat(decision.type(), equalTo(Decision.Type.YES)); + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "there is enough disk on this node for the shard to remain, free: [60b]")); result = strategy.reroute(clusterState, "reroute"); assertThat(result.routingTable().index("test").getShards().get(0).primaryShard().state(), equalTo(STARTED)); diff --git a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java index d3e9259994c..659c3b25833 100644 --- a/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java +++ b/core/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/DiskThresholdDeciderUnitTests.java @@ -52,6 +52,7 @@ import java.util.HashSet; import static java.util.Collections.emptyMap; import static java.util.Collections.emptySet; +import static org.hamcrest.Matchers.containsString; /** * Unit tests for the DiskThresholdDecider @@ -98,8 +99,15 @@ public class DiskThresholdDeciderUnitTests extends ESAllocationTestCase { shardSizes.put("[test][0][p]", 10L); // 10 bytes final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), ImmutableOpenMap.of()); RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, Collections.singleton(decider)), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); - assertEquals(mostAvailableUsage.toString(), Decision.YES, decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation)); - assertEquals(mostAvailableUsage.toString(), Decision.NO, decider.canAllocate(test_0, new RoutingNode("node_1", node_1), allocation)); + allocation.debugDecision(true); + Decision decision = decider.canAllocate(test_0, new RoutingNode("node_0", node_0), allocation); + assertEquals(mostAvailableUsage.toString(), Decision.Type.YES, decision.type()); + assertThat(((Decision.Single) decision).getExplanation(), containsString("enough disk for shard on node")); + decision = decider.canAllocate(test_0, new RoutingNode("node_1", node_1), allocation); + assertEquals(mostAvailableUsage.toString(), Decision.Type.NO, decision.type()); + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "the node is above the high watermark [cluster.routing.allocation.disk.watermark.high=90%], using more disk space than " + + "the maximum allowed [90.0%]")); } public void testCanRemainUsesLeastAvailableSpace() { @@ -165,8 +173,16 @@ public class DiskThresholdDeciderUnitTests extends ESAllocationTestCase { final ClusterInfo clusterInfo = new ClusterInfo(leastAvailableUsages.build(), mostAvailableUsage.build(), shardSizes.build(), shardRoutingMap.build()); RoutingAllocation allocation = new RoutingAllocation(new AllocationDeciders(Settings.EMPTY, Collections.singleton(decider)), clusterState.getRoutingNodes(), clusterState, clusterInfo, System.nanoTime(), false); - assertEquals(Decision.YES, decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation)); - assertEquals(Decision.NO, decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation)); + allocation.debugDecision(true); + Decision decision = decider.canRemain(test_0, new RoutingNode("node_0", node_0), allocation); + assertEquals(Decision.Type.YES, decision.type()); + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "there is enough disk on this node for the shard to remain, free: [10b]")); + decision = decider.canRemain(test_1, new RoutingNode("node_1", node_1), allocation); + assertEquals(Decision.Type.NO, decision.type()); + assertThat(((Decision.Single) decision).getExplanation(), containsString("the shard cannot remain on this node because it is " + + "above the high watermark [cluster.routing.allocation.disk.watermark.high=90%] and there is less than the required [10.0%] " + + "free disk on node, actual free: [9.0%]")); try { decider.canRemain(test_0, new RoutingNode("node_1", node_1), allocation); fail("not allocated on this node"); @@ -180,9 +196,15 @@ public class DiskThresholdDeciderUnitTests extends ESAllocationTestCase { // not allocated on that node } - assertEquals("can stay since allocated on a different path with enough space", Decision.YES, decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation)); + decision = decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation); + assertEquals("can stay since allocated on a different path with enough space", Decision.Type.YES, decision.type()); + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "this shard is not allocated on the most utilized disk and can remain")); - assertEquals("can stay since we don't have information about this shard", Decision.YES, decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation)); + decision = decider.canRemain(test_2, new RoutingNode("node_1", node_1), allocation); + assertEquals("can stay since we don't have information about this shard", Decision.Type.YES, decision.type()); + assertThat(((Decision.Single) decision).getExplanation(), containsString( + "this shard is not allocated on the most utilized disk and can remain")); } diff --git a/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java b/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java new file mode 100644 index 00000000000..f6fbc3410ac --- /dev/null +++ b/core/src/test/java/org/elasticsearch/index/search/MatchQueryIT.java @@ -0,0 +1,154 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.search; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoSearchHits; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; + +import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; +import org.elasticsearch.action.index.IndexRequestBuilder; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.xcontent.XContentBuilder; +import org.elasticsearch.common.xcontent.XContentFactory; +import org.elasticsearch.index.query.Operator; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.test.ESIntegTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.ExecutionException; + +public class MatchQueryIT extends ESIntegTestCase { + private static final String INDEX = "test"; + + /** + * Test setup. + */ + @Before + public void setUp() throws Exception { + super.setUp(); + CreateIndexRequestBuilder builder = prepareCreate(INDEX).setSettings( + Settings.builder() + .put(indexSettings()) + .put("index.analysis.filter.syns.type", "synonym") + .putArray("index.analysis.filter.syns.synonyms", "wtf, what the fudge", "foo, bar baz") + .put("index.analysis.analyzer.lower_syns.type", "custom") + .put("index.analysis.analyzer.lower_syns.tokenizer", "standard") + .putArray("index.analysis.analyzer.lower_syns.filter", "lowercase", "syns") + .put("index.analysis.filter.graphsyns.type", "synonym_graph") + .putArray("index.analysis.filter.graphsyns.synonyms", "wtf, what the fudge", "foo, bar baz") + .put("index.analysis.analyzer.lower_graphsyns.type", "custom") + .put("index.analysis.analyzer.lower_graphsyns.tokenizer", "standard") + .putArray("index.analysis.analyzer.lower_graphsyns.filter", "lowercase", "graphsyns") + ); + + assertAcked(builder.addMapping(INDEX, createMapping())); + ensureGreen(); + + List builders = new ArrayList<>(); + builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo")); + builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man")); + builders.add(client().prepareIndex("test", "test", "3").setSource("field", "wtf")); + builders.add(client().prepareIndex("test", "test", "4").setSource("field", "what is the name for fudge")); + builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three")); + builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three")); + + indexRandom(true, false, builders); + } + + /** + * Setup the index mappings for the test index. + * + * @return the json builder with the index mappings + * @throws IOException on error creating mapping json + */ + private XContentBuilder createMapping() throws IOException { + return XContentFactory.jsonBuilder() + .startObject() + .startObject(INDEX) + .startObject("properties") + .startObject("field") + .field("type", "text") + .endObject() + .endObject() + .endObject() + .endObject(); + } + + public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException { + // first search using regular synonym field using phrase + SearchResponse searchResponse = client().prepareSearch(INDEX) + .setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get(); + + // because foo -> "bar baz" where "foo" and "bar" at position 0, "baz" and "two" at position 1. + // "bar two three", "bar baz three", "foo two three", "foo baz three" + assertHitCount(searchResponse, 1L); + assertSearchHits(searchResponse, "5"); // we should not match this but we do + + // same query using graph should find correct result + searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three") + .analyzer("lower_graphsyns")).get(); + + assertHitCount(searchResponse, 1L); + assertSearchHits(searchResponse, "6"); + } + + public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException { + // first search using regular synonym field using phrase + SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") + .operator(Operator.AND).analyzer("lower_syns")).get(); + + // 0 = say, 1 = OR(wtf, what), 2 = the, 3 = fudge + // "the" and "fudge" are required here, even though they were part of the synonym which is also expanded + assertNoSearchHits(searchResponse); + + // same query using graph should find correct result + searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge") + .operator(Operator.AND).analyzer("lower_graphsyns")).get(); + + assertHitCount(searchResponse, 1L); + assertSearchHits(searchResponse, "1"); + } + + public void testMinShouldMatch() throws ExecutionException, InterruptedException { + // no min should match + SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo") + .operator(Operator.OR).analyzer("lower_graphsyns")).get(); + + assertHitCount(searchResponse, 6L); + assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6"); + + // same query, with min_should_match of 2 + searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo") + .operator(Operator.OR).analyzer("lower_graphsyns").minimumShouldMatch("80%")).get(); + + // three wtf foo = 2 terms, match #1 + // three wtf bar baz = 3 terms, match #6 + // three what the fudge foo = 4 terms, no match + // three what the fudge bar baz = 4 terms, match #2 + assertHitCount(searchResponse, 3L); + assertSearchHits(searchResponse, "1", "2", "6"); + } +} diff --git a/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java b/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java index 7d7b7a4cd6e..992667a5056 100644 --- a/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java +++ b/core/src/test/java/org/elasticsearch/index/search/MultiMatchQueryTests.java @@ -28,10 +28,12 @@ import org.apache.lucene.search.BoostQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.SynonymQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; +import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.MapperService; @@ -55,7 +57,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { @Before public void setup() throws IOException { - IndexService indexService = createIndex("test"); + Settings settings = Settings.builder() + .put("index.analysis.filter.syns.type","synonym") + .putArray("index.analysis.filter.syns.synonyms","quick,fast") + .put("index.analysis.analyzer.syns.tokenizer","standard") + .put("index.analysis.analyzer.syns.filter","syns").build(); + IndexService indexService = createIndex("test", settings); MapperService mapperService = indexService.mapperService(); String mapping = "{\n" + " \"person\":{\n" + @@ -63,10 +70,12 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { " \"name\":{\n" + " \"properties\":{\n" + " \"first\": {\n" + - " \"type\":\"text\"\n" + + " \"type\":\"text\",\n" + + " \"analyzer\":\"syns\"\n" + " }," + " \"last\": {\n" + - " \"type\":\"text\"\n" + + " \"type\":\"text\",\n" + + " \"analyzer\":\"syns\"\n" + " }" + " }" + " }\n" + @@ -176,4 +185,34 @@ public class MultiMatchQueryTests extends ESSingleNodeTestCase { assertThat(parsedQuery, instanceOf(MultiPhrasePrefixQuery.class)); assertThat(parsedQuery.toString(), equalTo("_all:\"foo*\"")); } + + public void testMultiMatchCrossFieldsWithSynonyms() throws IOException { + QueryShardContext queryShardContext = indexService.newQueryShardContext( + randomInt(20), null, () -> { throw new UnsupportedOperationException(); }); + + // check that synonym query is used for a single field + Query parsedQuery = + multiMatchQuery("quick").field("name.first") + .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + Term[] terms = new Term[2]; + terms[0] = new Term("name.first", "quick"); + terms[1] = new Term("name.first", "fast"); + Query expectedQuery = new SynonymQuery(terms); + assertThat(parsedQuery, equalTo(expectedQuery)); + + // check that blended term query is used for multiple fields + parsedQuery = + multiMatchQuery("quick").field("name.first").field("name.last") + .type(MultiMatchQueryBuilder.Type.CROSS_FIELDS).toQuery(queryShardContext); + terms = new Term[4]; + terms[0] = new Term("name.first", "quick"); + terms[1] = new Term("name.first", "fast"); + terms[2] = new Term("name.last", "quick"); + terms[3] = new Term("name.last", "fast"); + float[] boosts = new float[4]; + Arrays.fill(boosts, 1.0f); + expectedQuery = BlendedTermQuery.dismaxBlendedQuery(terms, boosts, 1.0f); + assertThat(parsedQuery, equalTo(expectedQuery)); + + } } diff --git a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 8f8887bd150..0a73c79ff9f 100644 --- a/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/core/src/test/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -39,6 +39,7 @@ import org.elasticsearch.index.query.Operator; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.functionscore.FunctionScoreQueryBuilder; +import org.elasticsearch.index.query.functionscore.RandomScoreFunctionBuilder; import org.elasticsearch.index.search.MatchQuery; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.rest.RestStatus; @@ -2935,6 +2936,26 @@ public class HighlighterSearchIT extends ESIntegTestCase { assertThat(field.getFragments()[0].string(), equalTo("brown")); } + public void testFiltersFunctionScoreQueryHighlight() throws Exception { + client().prepareIndex("test", "type", "1") + .setSource(jsonBuilder().startObject().field("text", "brown").field("enable", "yes").endObject()) + .setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE) + .get(); + FunctionScoreQueryBuilder.FilterFunctionBuilder filterBuilder = + new FunctionScoreQueryBuilder.FilterFunctionBuilder(QueryBuilders.termQuery("enable", "yes"), + new RandomScoreFunctionBuilder()); + SearchResponse searchResponse = client().prepareSearch() + .setQuery(new FunctionScoreQueryBuilder(QueryBuilders.prefixQuery("text", "bro"), + new FunctionScoreQueryBuilder.FilterFunctionBuilder[] {filterBuilder})) + .highlighter(new HighlightBuilder() + .field(new Field("text"))) + .get(); + assertHitCount(searchResponse, 1); + HighlightField field = searchResponse.getHits().getAt(0).highlightFields().get("text"); + assertThat(field.getFragments().length, equalTo(1)); + assertThat(field.getFragments()[0].string(), equalTo("brown")); + } + public void testSynonyms() throws IOException { Builder builder = Settings.builder() .put(indexSettings()) diff --git a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java index f750175889b..769866641e5 100644 --- a/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java +++ b/core/src/test/java/org/elasticsearch/snapshots/SharedClusterSnapshotRestoreIT.java @@ -69,7 +69,6 @@ import org.elasticsearch.index.IndexService; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.indices.InvalidIndexNameException; -import org.elasticsearch.indices.recovery.RecoverySettings; import org.elasticsearch.ingest.IngestTestPlugin; import org.elasticsearch.plugins.Plugin; import org.elasticsearch.repositories.IndexId; @@ -95,6 +94,7 @@ import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; +import java.util.stream.Stream; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; @@ -1066,6 +1066,44 @@ public class SharedClusterSnapshotRestoreIT extends AbstractSnapshotIntegTestCas assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), equalTo(createSnapshotResponse.getSnapshotInfo().totalShards())); } + public void testSnapshotWithMissingShardLevelIndexFile() throws Exception { + Path repo = randomRepoPath(); + logger.info("--> creating repository at {}", repo.toAbsolutePath()); + assertAcked(client().admin().cluster().preparePutRepository("test-repo").setType("fs").setSettings( + Settings.builder().put("location", repo).put("compress", false))); + + createIndex("test-idx-1", "test-idx-2"); + logger.info("--> indexing some data"); + indexRandom(true, + client().prepareIndex("test-idx-1", "doc").setSource("foo", "bar"), + client().prepareIndex("test-idx-2", "doc").setSource("foo", "bar")); + + logger.info("--> creating snapshot"); + client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-1") + .setWaitForCompletion(true).setIndices("test-idx-*").get(); + + logger.info("--> deleting shard level index file"); + try (Stream files = Files.list(repo.resolve("indices"))) { + files.forEach(indexPath -> + IOUtils.deleteFilesIgnoringExceptions(indexPath.resolve("0").resolve("index-0")) + ); + } + + logger.info("--> creating another snapshot"); + CreateSnapshotResponse createSnapshotResponse = + client().admin().cluster().prepareCreateSnapshot("test-repo", "test-snap-2") + .setWaitForCompletion(true).setIndices("test-idx-1").get(); + assertThat(createSnapshotResponse.getSnapshotInfo().successfulShards(), greaterThan(0)); + assertEquals(createSnapshotResponse.getSnapshotInfo().successfulShards(), createSnapshotResponse.getSnapshotInfo().totalShards()); + + logger.info("--> restoring the first snapshot, the repository should not have lost any shard data despite deleting index-N, " + + "because it should have iterated over the snap-*.data files as backup"); + client().admin().indices().prepareDelete("test-idx-1", "test-idx-2").get(); + RestoreSnapshotResponse restoreSnapshotResponse = + client().admin().cluster().prepareRestoreSnapshot("test-repo", "test-snap-1").setWaitForCompletion(true).get(); + assertEquals(0, restoreSnapshotResponse.getRestoreInfo().failedShards()); + } + public void testSnapshotClosedIndex() throws Exception { Client client = client(); diff --git a/core/src/test/java/org/elasticsearch/transport/ConnectionProfileTests.java b/core/src/test/java/org/elasticsearch/transport/ConnectionProfileTests.java new file mode 100644 index 00000000000..04973e70cb3 --- /dev/null +++ b/core/src/test/java/org/elasticsearch/transport/ConnectionProfileTests.java @@ -0,0 +1,79 @@ +/* + * Licensed to Elasticsearch under one or more contributor + * license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.elasticsearch.transport; + +import org.elasticsearch.test.ESTestCase; +import org.hamcrest.Matchers; + +public class ConnectionProfileTests extends ESTestCase { + + public void testBuildConnectionProfile() { + ConnectionProfile.Builder builder = new ConnectionProfile.Builder(); + builder.addConnections(1, TransportRequestOptions.Type.BULK); + builder.addConnections(2, TransportRequestOptions.Type.STATE, TransportRequestOptions.Type.RECOVERY); + builder.addConnections(3, TransportRequestOptions.Type.PING); + IllegalStateException illegalStateException = expectThrows(IllegalStateException.class, builder::build); + assertEquals("not all types are added for this connection profile - missing types: [REG]", illegalStateException.getMessage()); + + IllegalArgumentException illegalArgumentException = expectThrows(IllegalArgumentException.class, + () -> builder.addConnections(4, TransportRequestOptions.Type.REG, TransportRequestOptions.Type.PING)); + assertEquals("type [PING] is already registered", illegalArgumentException.getMessage()); + builder.addConnections(4, TransportRequestOptions.Type.REG); + ConnectionProfile build = builder.build(); + assertEquals(10, build.getNumConnections()); + Integer[] array = new Integer[10]; + for (int i = 0; i < array.length; i++) { + array[i] = i; + } + final int numIters = randomIntBetween(5, 10); + assertEquals(4, build.getHandles().size()); + assertEquals(0, build.getHandles().get(0).offset); + assertEquals(1, build.getHandles().get(0).length); + assertArrayEquals(new TransportRequestOptions.Type[] {TransportRequestOptions.Type.BULK}, build.getHandles().get(0).getTypes()); + Integer channel = build.getHandles().get(0).getChannel(array); + for (int i = 0; i < numIters; i++) { + assertEquals(0, channel.intValue()); + } + + assertEquals(1, build.getHandles().get(1).offset); + assertEquals(2, build.getHandles().get(1).length); + assertArrayEquals(new TransportRequestOptions.Type[] {TransportRequestOptions.Type.STATE, TransportRequestOptions.Type.RECOVERY}, + build.getHandles().get(1).getTypes()); + channel = build.getHandles().get(1).getChannel(array); + for (int i = 0; i < numIters; i++) { + assertThat(channel, Matchers.anyOf(Matchers.is(1), Matchers.is(2))); + } + + assertEquals(3, build.getHandles().get(2).offset); + assertEquals(3, build.getHandles().get(2).length); + assertArrayEquals(new TransportRequestOptions.Type[] {TransportRequestOptions.Type.PING}, build.getHandles().get(2).getTypes()); + channel = build.getHandles().get(2).getChannel(array); + for (int i = 0; i < numIters; i++) { + assertThat(channel, Matchers.anyOf(Matchers.is(3), Matchers.is(4), Matchers.is(5))); + } + + assertEquals(6, build.getHandles().get(3).offset); + assertEquals(4, build.getHandles().get(3).length); + assertArrayEquals(new TransportRequestOptions.Type[] {TransportRequestOptions.Type.REG}, build.getHandles().get(3).getTypes()); + channel = build.getHandles().get(3).getChannel(array); + for (int i = 0; i < numIters; i++) { + assertThat(channel, Matchers.anyOf(Matchers.is(6), Matchers.is(7), Matchers.is(8), Matchers.is(9))); + } + } +} diff --git a/core/src/test/java/org/elasticsearch/transport/TCPTransportTests.java b/core/src/test/java/org/elasticsearch/transport/TCPTransportTests.java index 0525f4a32dc..fafdd529ac7 100644 --- a/core/src/test/java/org/elasticsearch/transport/TCPTransportTests.java +++ b/core/src/test/java/org/elasticsearch/transport/TCPTransportTests.java @@ -31,7 +31,6 @@ import org.elasticsearch.common.util.BigArrays; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.threadpool.TestThreadPool; import org.elasticsearch.threadpool.ThreadPool; -import org.elasticsearch.transport.support.TransportStatus; import java.io.IOException; import java.net.InetSocketAddress; @@ -168,11 +167,6 @@ public class TCPTransportTests extends ESTestCase { } - @Override - protected NodeChannels connectToChannelsLight(DiscoveryNode node) throws IOException { - return new NodeChannels(new Object[0], new Object[0], new Object[0], new Object[0], new Object[0]); - } - @Override protected void sendMessage(Object o, BytesReference reference, Runnable sendListener) throws IOException { StreamInput streamIn = reference.streamInput(); @@ -198,8 +192,8 @@ public class TCPTransportTests extends ESTestCase { } @Override - protected NodeChannels connectToChannels(DiscoveryNode node) throws IOException { - return new NodeChannels(new Object[0], new Object[0], new Object[0], new Object[0], new Object[0]); + protected NodeChannels connectToChannels(DiscoveryNode node, ConnectionProfile profile) throws IOException { + return new NodeChannels(new Object[profile.getNumConnections()], profile); } @Override @@ -214,7 +208,8 @@ public class TCPTransportTests extends ESTestCase { @Override protected Object nodeChannel(DiscoveryNode node, TransportRequestOptions options) throws ConnectTransportException { - return new NodeChannels(new Object[0], new Object[0], new Object[0], new Object[0], new Object[0]); + return new NodeChannels(new Object[ConnectionProfile.LIGHT_PROFILE.getNumConnections()], + ConnectionProfile.LIGHT_PROFILE); } }; DiscoveryNode node = new DiscoveryNode("foo", buildNewFakeTransportAddress(), Version.CURRENT); diff --git a/core/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java b/core/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java index 7fccc42bb79..45aca7fe2c0 100644 --- a/core/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java +++ b/core/src/test/java/org/elasticsearch/transport/TransportServiceHandshakeTests.java @@ -113,7 +113,7 @@ public class TransportServiceHandshakeTests extends ESTestCase { emptySet(), Version.CURRENT.minimumCompatibilityVersion()); DiscoveryNode connectedNode = - handleA.transportService.connectToNodeLightAndHandshake(discoveryNode, timeout); + handleA.transportService.connectToNodeAndHandshake(discoveryNode, timeout); assertNotNull(connectedNode); // the name and version should be updated @@ -132,7 +132,7 @@ public class TransportServiceHandshakeTests extends ESTestCase { emptyMap(), emptySet(), Version.CURRENT.minimumCompatibilityVersion()); - IllegalStateException ex = expectThrows(IllegalStateException.class, () -> handleA.transportService.connectToNodeLightAndHandshake( + IllegalStateException ex = expectThrows(IllegalStateException.class, () -> handleA.transportService.connectToNodeAndHandshake( discoveryNode, timeout)); assertThat(ex.getMessage(), containsString("handshake failed, mismatched cluster name [Cluster [b]]")); assertFalse(handleA.transportService.nodeConnected(discoveryNode)); @@ -149,7 +149,7 @@ public class TransportServiceHandshakeTests extends ESTestCase { emptyMap(), emptySet(), Version.CURRENT.minimumCompatibilityVersion()); - IllegalStateException ex = expectThrows(IllegalStateException.class, () -> handleA.transportService.connectToNodeLightAndHandshake( + IllegalStateException ex = expectThrows(IllegalStateException.class, () -> handleA.transportService.connectToNodeAndHandshake( discoveryNode, timeout)); assertThat(ex.getMessage(), containsString("handshake failed, incompatible version")); assertFalse(handleA.transportService.nodeConnected(discoveryNode)); @@ -171,7 +171,7 @@ public class TransportServiceHandshakeTests extends ESTestCase { emptyMap(), emptySet(), Version.CURRENT.minimumCompatibilityVersion()); - DiscoveryNode connectedNode = handleA.transportService.connectToNodeLightAndHandshake(discoveryNode, timeout, false); + DiscoveryNode connectedNode = handleA.transportService.connectToNodeAndHandshake(discoveryNode, timeout, false); assertNotNull(connectedNode); assertEquals(connectedNode.getName(), "TS_B"); assertEquals(connectedNode.getVersion(), handleB.discoveryNode.getVersion()); diff --git a/docs/build.gradle b/docs/build.gradle index ec9800bab0f..dfbf8bc37fb 100644 --- a/docs/build.gradle +++ b/docs/build.gradle @@ -92,6 +92,7 @@ buildRestTests.expectedUnconvertedCandidates = [ 'reference/analysis/tokenfilters/stemmer-tokenfilter.asciidoc', 'reference/analysis/tokenfilters/stop-tokenfilter.asciidoc', 'reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc', + 'reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc', 'reference/analysis/tokenfilters/word-delimiter-tokenfilter.asciidoc', 'reference/cat/recovery.asciidoc', 'reference/cat/shards.asciidoc', diff --git a/docs/reference/analysis/tokenfilters.asciidoc b/docs/reference/analysis/tokenfilters.asciidoc index 89cce11a615..227947fb45e 100644 --- a/docs/reference/analysis/tokenfilters.asciidoc +++ b/docs/reference/analysis/tokenfilters.asciidoc @@ -47,6 +47,8 @@ include::tokenfilters/phonetic-tokenfilter.asciidoc[] include::tokenfilters/synonym-tokenfilter.asciidoc[] +include::tokenfilters/synonym-graph-tokenfilter.asciidoc[] + include::tokenfilters/compound-word-tokenfilter.asciidoc[] include::tokenfilters/reverse-tokenfilter.asciidoc[] diff --git a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc new file mode 100644 index 00000000000..16758ad6ad2 --- /dev/null +++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc @@ -0,0 +1,152 @@ +[[analysis-synonym-graph-tokenfilter]] +=== Synonym Graph Token Filter + +experimental[] + +The `synonym_graph` token filter allows to easily handle synonyms, +including multi-word synonyms correctly during the analysis process. + +In order to properly handle multi-word synonyms this token filter +creates a "graph token stream" during processing. For more information +on this topic and it's various complexities, please read +http://blog.mikemccandless.com/2012/04/lucenes-tokenstreams-are-actually.html[Lucene's TokenStreams are actually graphs!] +by Michael McCandless. + +["NOTE",id="synonym-graph-index-note"] +=============================== +This token filter is designed to be used as part of a search analyzer +only. If you want to apply synonyms during indexing please use the +standard <>. +=============================== + +["NOTE",id="synonym-graph-query-note"] +=============================== +The graph token stream created by this token filter requires special +query handling. Currently only the <> and +<> queries can do this. Using +it with any other type of analyzed query will potentially result in +incorrect search results. +=============================== + +Synonyms are configured using a configuration file. +Here is an example: + +[source,js] +-------------------------------------------------- +{ + "index" : { + "analysis" : { + "analyzer" : { + "search_synonyms" : { + "tokenizer" : "whitespace", + "filter" : ["graph_synonyms"] + } + }, + "filter" : { + "graph_synonyms" : { + "type" : "synonym_graph", + "synonyms_path" : "analysis/synonym.txt" + } + } + } + } +} +-------------------------------------------------- + +The above configures a `search_synonyms` filter, with a path of +`analysis/synonym.txt` (relative to the `config` location). The +`search_synonyms` analyzer is then configured with the filter. +Additional settings are: `ignore_case` (defaults to `false`), and +`expand` (defaults to `true`). + +The `tokenizer` parameter controls the tokenizers that will be used to +tokenize the synonym, and defaults to the `whitespace` tokenizer. + +Two synonym formats are supported: Solr, WordNet. + +[float] +==== Solr synonyms + +The following is a sample format of the file: + +[source,js] +-------------------------------------------------- +# Blank lines and lines starting with pound are comments. + +# Explicit mappings match any token sequence on the LHS of "=>" +# and replace with all alternatives on the RHS. These types of mappings +# ignore the expand parameter in the schema. +# Examples: +i-pod, i pod => ipod, +sea biscuit, sea biscit => seabiscuit + +# Equivalent synonyms may be separated with commas and give +# no explicit mapping. In this case the mapping behavior will +# be taken from the expand parameter in the schema. This allows +# the same synonym file to be used in different synonym handling strategies. +# Examples: +ipod, i-pod, i pod +foozball , foosball +universe , cosmos +lol, laughing out loud + +# If expand==true, "ipod, i-pod, i pod" is equivalent +# to the explicit mapping: +ipod, i-pod, i pod => ipod, i-pod, i pod +# If expand==false, "ipod, i-pod, i pod" is equivalent +# to the explicit mapping: +ipod, i-pod, i pod => ipod + +# Multiple synonym mapping entries are merged. +foo => foo bar +foo => baz +# is equivalent to +foo => foo bar, baz +-------------------------------------------------- + +You can also define synonyms for the filter directly in the +configuration file (note use of `synonyms` instead of `synonyms_path`): + +[source,js] +-------------------------------------------------- +{ + "filter" : { + "synonym" : { + "type" : "synonym_graph", + "synonyms" : [ + "lol, laughing out loud", + "universe, cosmos" + ] + } + } +} +-------------------------------------------------- + +However, it is recommended to define large synonyms set in a file using +`synonyms_path`, because specifying them inline increases cluster size unnecessarily. + +[float] +==== WordNet synonyms + +Synonyms based on http://wordnet.princeton.edu/[WordNet] format can be +declared using `format`: + +[source,js] +-------------------------------------------------- +{ + "filter" : { + "synonym" : { + "type" : "synonym_graph", + "format" : "wordnet", + "synonyms" : [ + "s(100000001,1,'abstain',v,1,0).", + "s(100000001,2,'refrain',v,1,0).", + "s(100000001,3,'desist',v,1,0)." + ] + } + } +} +-------------------------------------------------- + +Using `synonyms_path` to define WordNet synonyms in a file is supported +as well. diff --git a/docs/reference/docs/reindex.asciidoc b/docs/reference/docs/reindex.asciidoc index f9025c378f6..dff3fa066da 100644 --- a/docs/reference/docs/reindex.asciidoc +++ b/docs/reference/docs/reindex.asciidoc @@ -227,6 +227,28 @@ POST _reindex // CONSOLE // TEST[setup:twitter] +The `source` section supports all the elements that are supported in a +<>. For instance only a subset of the +fields from the original documents can be reindexed using source filtering +as follows: + +[source,js] +-------------------------------------------------- +POST _reindex +{ + "source": { + "index": "twitter", + "_source": ["user", "tweet"] + }, + "dest": { + "index": "new_twitter" + } +} +-------------------------------------------------- +// CONSOLE +// TEST[setup:twitter] + + Like `_update_by_query`, `_reindex` supports a script that modifies the document. Unlike `_update_by_query`, the script is allowed to modify the document's metadata. This example bumps the version of the source document: diff --git a/docs/reference/search/suggesters/term-suggest.asciidoc b/docs/reference/search/suggesters/term-suggest.asciidoc index ff5cd38cf21..f76b17e0ed2 100644 --- a/docs/reference/search/suggesters/term-suggest.asciidoc +++ b/docs/reference/search/suggesters/term-suggest.asciidoc @@ -45,7 +45,7 @@ doesn't take the query into account that is part of request. + ** `missing`: Only provide suggestions for suggest text terms that are not in the index. This is the default. - ** `popular`: Only suggest suggestions that occur in more docs then + ** `popular`: Only suggest suggestions that occur in more docs than the original suggest text term. ** `always`: Suggest any matching suggestions based on terms in the suggest text. diff --git a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java index bb84c47dc17..f70d9ab6d0f 100644 --- a/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java +++ b/modules/transport-netty4/src/main/java/org/elasticsearch/transport/netty4/Netty4Transport.java @@ -62,6 +62,7 @@ import org.elasticsearch.indices.breaker.CircuitBreakerService; import org.elasticsearch.monitor.jvm.JvmInfo; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.ConnectionProfile; import org.elasticsearch.transport.TcpTransport; import org.elasticsearch.transport.TransportServiceAdapter; import org.elasticsearch.transport.TransportSettings; @@ -331,37 +332,13 @@ public class Netty4Transport extends TcpTransport { return channels == null ? 0 : channels.numberOfOpenChannels(); } - protected NodeChannels connectToChannelsLight(DiscoveryNode node) { - InetSocketAddress address = node.getAddress().address(); - ChannelFuture connect = bootstrap.connect(address); - connect.awaitUninterruptibly((long) (connectTimeout.millis() * 1.5)); - if (!connect.isSuccess()) { - throw new ConnectTransportException(node, "connect_timeout[" + connectTimeout + "]", connect.cause()); - } - Channel[] channels = new Channel[1]; - channels[0] = connect.channel(); - channels[0].closeFuture().addListener(new ChannelCloseListener(node)); - NodeChannels nodeChannels = new NodeChannels(channels, channels, channels, channels, channels); - onAfterChannelsConnected(nodeChannels); - return nodeChannels; - } - - protected NodeChannels connectToChannels(DiscoveryNode node) { - final NodeChannels nodeChannels = - new NodeChannels( - new Channel[connectionsPerNodeRecovery], - new Channel[connectionsPerNodeBulk], - new Channel[connectionsPerNodeReg], - new Channel[connectionsPerNodeState], - new Channel[connectionsPerNodePing]); + @Override + protected NodeChannels connectToChannels(DiscoveryNode node, ConnectionProfile profile) { + final Channel[] channels = new Channel[profile.getNumConnections()]; + final NodeChannels nodeChannels = new NodeChannels(channels, profile); boolean success = false; try { - int numConnections = - connectionsPerNodeRecovery + - connectionsPerNodeBulk + - connectionsPerNodeReg + - connectionsPerNodeState + - connectionsPerNodeRecovery; + int numConnections = channels.length; final ArrayList connections = new ArrayList<>(numConnections); final InetSocketAddress address = node.getAddress().address(); for (int i = 0; i < numConnections; i++) { @@ -369,27 +346,15 @@ public class Netty4Transport extends TcpTransport { } final Iterator iterator = connections.iterator(); try { - for (Channel[] channels : nodeChannels.getChannelArrays()) { - for (int i = 0; i < channels.length; i++) { - assert iterator.hasNext(); - ChannelFuture future = iterator.next(); - future.awaitUninterruptibly((long) (connectTimeout.millis() * 1.5)); - if (!future.isSuccess()) { - throw new ConnectTransportException(node, "connect_timeout[" + connectTimeout + "]", future.cause()); - } - channels[i] = future.channel(); - channels[i].closeFuture().addListener(new ChannelCloseListener(node)); + for (int i = 0; i < channels.length; i++) { + assert iterator.hasNext(); + ChannelFuture future = iterator.next(); + future.awaitUninterruptibly((long) (connectTimeout.millis() * 1.5)); + if (!future.isSuccess()) { + throw new ConnectTransportException(node, "connect_timeout[" + connectTimeout + "]", future.cause()); } - } - if (nodeChannels.recovery.length == 0) { - if (nodeChannels.bulk.length > 0) { - nodeChannels.recovery = nodeChannels.bulk; - } else { - nodeChannels.recovery = nodeChannels.reg; - } - } - if (nodeChannels.bulk.length == 0) { - nodeChannels.bulk = nodeChannels.reg; + channels[i] = future.channel(); + channels[i].closeFuture().addListener(new ChannelCloseListener(node)); } } catch (final RuntimeException e) { for (final ChannelFuture future : Collections.unmodifiableList(connections)) { diff --git a/modules/transport-netty4/src/main/plugin-metadata/plugin-security.policy b/modules/transport-netty4/src/main/plugin-metadata/plugin-security.policy index 902bfdee231..ce39869d2fb 100644 --- a/modules/transport-netty4/src/main/plugin-metadata/plugin-security.policy +++ b/modules/transport-netty4/src/main/plugin-metadata/plugin-security.policy @@ -17,8 +17,13 @@ * under the License. */ +grant codeBase "${codebase.netty-common-4.1.6.Final.jar}" { + // for reading the system-wide configuration for the backlog of established sockets + permission java.io.FilePermission "/proc/sys/net/core/somaxconn", "read"; +}; + grant codeBase "${codebase.netty-transport-4.1.6.Final.jar}" { // Netty NioEventLoop wants to change this, because of https://bugs.openjdk.java.net/browse/JDK-6427854 // the bug says it only happened rarely, and that its fixed, but apparently it still happens rarely! permission java.util.PropertyPermission "sun.nio.ch.bugLevel", "write"; -}; \ No newline at end of file +}; diff --git a/modules/transport-netty4/src/test/java/org/elasticsearch/rest/Netty4HeadBodyIsEmptyIT.java b/modules/transport-netty4/src/test/java/org/elasticsearch/rest/Netty4HeadBodyIsEmptyIT.java index 8716f59ee00..037229f0972 100644 --- a/modules/transport-netty4/src/test/java/org/elasticsearch/rest/Netty4HeadBodyIsEmptyIT.java +++ b/modules/transport-netty4/src/test/java/org/elasticsearch/rest/Netty4HeadBodyIsEmptyIT.java @@ -19,5 +19,53 @@ package org.elasticsearch.rest; -public class Netty4HeadBodyIsEmptyIT extends HeadBodyIsEmptyIntegTestCase { +import org.apache.http.entity.StringEntity; +import org.elasticsearch.client.Response; +import org.elasticsearch.test.rest.ESRestTestCase; +import org.hamcrest.Matcher; + +import java.io.IOException; +import java.util.Map; + +import static java.util.Collections.emptyMap; +import static java.util.Collections.singletonMap; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; + +public class Netty4HeadBodyIsEmptyIT extends ESRestTestCase { + + public void testHeadRoot() throws IOException { + headTestCase("/", emptyMap(), greaterThan(0)); + headTestCase("/", singletonMap("pretty", ""), greaterThan(0)); + headTestCase("/", singletonMap("pretty", "true"), greaterThan(0)); + } + + private void createTestDoc() throws IOException { + client().performRequest("PUT", "test/test/1", emptyMap(), new StringEntity("{\"test\": \"test\"}")); + } + + public void testDocumentExists() throws IOException { + createTestDoc(); + headTestCase("test/test/1", emptyMap(), equalTo(0)); + headTestCase("test/test/1", singletonMap("pretty", "true"), equalTo(0)); + } + + public void testIndexExists() throws IOException { + createTestDoc(); + headTestCase("test", emptyMap(), equalTo(0)); + headTestCase("test", singletonMap("pretty", "true"), equalTo(0)); + } + + public void testTypeExists() throws IOException { + createTestDoc(); + headTestCase("test/test", emptyMap(), equalTo(0)); + headTestCase("test/test", singletonMap("pretty", "true"), equalTo(0)); + } + + private void headTestCase(String url, Map params, Matcher matcher) throws IOException { + Response response = client().performRequest("HEAD", url, params); + assertEquals(200, response.getStatusLine().getStatusCode()); + assertThat(Integer.valueOf(response.getHeader("Content-Length")), matcher); + assertNull("HEAD requests shouldn't have a response body but " + url + " did", response.getEntity()); + } } diff --git a/qa/smoke-test-multinode/src/test/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java b/qa/smoke-test-multinode/src/test/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java index 1fe8cfeb9d5..225e12f65fd 100644 --- a/qa/smoke-test-multinode/src/test/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java +++ b/qa/smoke-test-multinode/src/test/java/org/elasticsearch/smoketest/SmokeTestMultiNodeClientYamlTestSuiteIT.java @@ -22,12 +22,15 @@ package org.elasticsearch.smoketest; import com.carrotsearch.randomizedtesting.annotations.Name; import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite; +import org.apache.lucene.util.TimeUnits; import org.elasticsearch.test.rest.yaml.ClientYamlTestCandidate; import org.elasticsearch.test.rest.yaml.ESClientYamlSuiteTestCase; import org.elasticsearch.test.rest.yaml.parser.ClientYamlTestParseException; import java.io.IOException; +@TimeoutSuite(millis = 40 * TimeUnits.MINUTE) // some of the windows test VMs are slow as hell public class SmokeTestMultiNodeClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase { public SmokeTestMultiNodeClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { diff --git a/test/framework/src/main/java/org/elasticsearch/common/util/MockBigArrays.java b/test/framework/src/main/java/org/elasticsearch/common/util/MockBigArrays.java index 8d1f42d5b57..5f76ac0030d 100644 --- a/test/framework/src/main/java/org/elasticsearch/common/util/MockBigArrays.java +++ b/test/framework/src/main/java/org/elasticsearch/common/util/MockBigArrays.java @@ -48,7 +48,6 @@ public class MockBigArrays extends BigArrays { */ private static final boolean TRACK_ALLOCATIONS = false; - private static final Set INSTANCES = Collections.synchronizedSet(Collections.newSetFromMap(new WeakHashMap())); private static final ConcurrentMap ACQUIRED_ARRAYS = new ConcurrentHashMap<>(); public static void ensureAllArraysAreReleased() throws Exception { @@ -88,7 +87,6 @@ public class MockBigArrays extends BigArrays { seed = 0; } random = new Random(seed); - INSTANCES.add(this); } @@ -247,15 +245,13 @@ public class MockBigArrays extends BigArrays { private abstract static class AbstractArrayWrapper { - final BigArray in; - boolean clearOnResize; - AtomicReference originalRelease; + final boolean clearOnResize; + private final AtomicReference originalRelease; - AbstractArrayWrapper(BigArray in, boolean clearOnResize) { - ACQUIRED_ARRAYS.put(this, TRACK_ALLOCATIONS ? new RuntimeException() : Boolean.TRUE); - this.in = in; + AbstractArrayWrapper(boolean clearOnResize) { this.clearOnResize = clearOnResize; - originalRelease = new AtomicReference<>(); + this.originalRelease = new AtomicReference<>(); + ACQUIRED_ARRAYS.put(this, TRACK_ALLOCATIONS ? new RuntimeException() : Boolean.TRUE); } protected abstract BigArray getDelegate(); @@ -267,7 +263,7 @@ public class MockBigArrays extends BigArrays { } public long ramBytesUsed() { - return in.ramBytesUsed(); + return getDelegate().ramBytesUsed(); } public void close() { @@ -286,7 +282,7 @@ public class MockBigArrays extends BigArrays { private final ByteArray in; ByteArrayWrapper(ByteArray in, boolean clearOnResize) { - super(in, clearOnResize); + super(clearOnResize); this.in = in; } @@ -336,7 +332,7 @@ public class MockBigArrays extends BigArrays { private final IntArray in; IntArrayWrapper(IntArray in, boolean clearOnResize) { - super(in, clearOnResize); + super(clearOnResize); this.in = in; } @@ -381,7 +377,7 @@ public class MockBigArrays extends BigArrays { private final LongArray in; LongArrayWrapper(LongArray in, boolean clearOnResize) { - super(in, clearOnResize); + super(clearOnResize); this.in = in; } @@ -427,7 +423,7 @@ public class MockBigArrays extends BigArrays { private final FloatArray in; FloatArrayWrapper(FloatArray in, boolean clearOnResize) { - super(in, clearOnResize); + super(clearOnResize); this.in = in; } @@ -472,7 +468,7 @@ public class MockBigArrays extends BigArrays { private final DoubleArray in; DoubleArrayWrapper(DoubleArray in, boolean clearOnResize) { - super(in, clearOnResize); + super(clearOnResize); this.in = in; } @@ -517,7 +513,7 @@ public class MockBigArrays extends BigArrays { private final ObjectArray in; ObjectArrayWrapper(ObjectArray in) { - super(in, false); + super(false); this.in = in; } diff --git a/test/framework/src/main/java/org/elasticsearch/rest/HeadBodyIsEmptyIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/rest/HeadBodyIsEmptyIntegTestCase.java deleted file mode 100644 index 0e43814b75c..00000000000 --- a/test/framework/src/main/java/org/elasticsearch/rest/HeadBodyIsEmptyIntegTestCase.java +++ /dev/null @@ -1,74 +0,0 @@ -/* - * Licensed to Elasticsearch under one or more contributor - * license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright - * ownership. Elasticsearch licenses this file to you under - * the Apache License, Version 2.0 (the "License"); you may - * not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.rest; - -import org.apache.http.entity.StringEntity; -import org.elasticsearch.client.Response; -import org.elasticsearch.test.rest.ESRestTestCase; -import org.hamcrest.Matcher; - -import java.io.IOException; -import java.io.UnsupportedEncodingException; -import java.util.Map; - -import static java.util.Collections.emptyMap; -import static java.util.Collections.singletonMap; -import static org.hamcrest.CoreMatchers.equalTo; -import static org.hamcrest.Matchers.greaterThan; - -/** - * Tests that HTTP HEAD requests don't respond with a body. - */ -public class HeadBodyIsEmptyIntegTestCase extends ESRestTestCase { - public void testHeadRoot() throws IOException { - headTestCase("/", emptyMap(), greaterThan(0)); - headTestCase("/", singletonMap("pretty", ""), greaterThan(0)); - headTestCase("/", singletonMap("pretty", "true"), greaterThan(0)); - } - - private void createTestDoc() throws UnsupportedEncodingException, IOException { - client().performRequest("PUT", "test/test/1", emptyMap(), new StringEntity("{\"test\": \"test\"}")); - } - - public void testDocumentExists() throws IOException { - createTestDoc(); - headTestCase("test/test/1", emptyMap(), equalTo(0)); - headTestCase("test/test/1", singletonMap("pretty", "true"), equalTo(0)); - } - - public void testIndexExists() throws IOException { - createTestDoc(); - headTestCase("test", emptyMap(), equalTo(0)); - headTestCase("test", singletonMap("pretty", "true"), equalTo(0)); - } - - public void testTypeExists() throws IOException { - createTestDoc(); - headTestCase("test/test", emptyMap(), equalTo(0)); - headTestCase("test/test", singletonMap("pretty", "true"), equalTo(0)); - } - - private void headTestCase(String url, Map params, Matcher matcher) throws IOException { - Response response = client().performRequest("HEAD", url, params); - assertEquals(200, response.getStatusLine().getStatusCode()); - assertThat(Integer.valueOf(response.getHeader("Content-Length")), matcher); - assertNull("HEAD requests shouldn't have a response body but " + url + " did", response.getEntity()); - } -} diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java index e05057648cc..830f119150d 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/ESRestTestCase.java @@ -37,7 +37,10 @@ import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.test.ESTestCase; import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import javax.net.ssl.SSLContext; import java.io.IOException; import java.io.InputStream; import java.nio.file.Files; @@ -53,8 +56,6 @@ import java.util.List; import java.util.Map; import java.util.Set; -import javax.net.ssl.SSLContext; - import static java.util.Collections.singletonMap; import static java.util.Collections.sort; import static java.util.Collections.unmodifiableList; @@ -62,7 +63,7 @@ import static java.util.Collections.unmodifiableList; /** * Superclass for tests that interact with an external test cluster using Elasticsearch's {@link RestClient}. */ -public class ESRestTestCase extends ESTestCase { +public abstract class ESRestTestCase extends ESTestCase { public static final String TRUSTSTORE_PATH = "truststore.path"; public static final String TRUSTSTORE_PASSWORD = "truststore.password"; @@ -76,45 +77,48 @@ public class ESRestTestCase extends ESTestCase { } } - private final List clusterHosts; + private static List clusterHosts; /** - * A client for the running Elasticsearch cluster. Lazily initialized on first use. + * A client for the running Elasticsearch cluster */ - private final RestClient client; + private static RestClient client; /** * A client for the running Elasticsearch cluster configured to take test administrative actions like remove all indexes after the test - * completes. Lazily initialized on first use. + * completes */ - private final RestClient adminClient; + private static RestClient adminClient; - public ESRestTestCase() { - String cluster = System.getProperty("tests.rest.cluster"); - if (cluster == null) { - throw new RuntimeException("Must specify [tests.rest.cluster] system property with a comma delimited list of [host:port] " - + "to which to send REST requests"); - } - String[] stringUrls = cluster.split(","); - List clusterHosts = new ArrayList<>(stringUrls.length); - for (String stringUrl : stringUrls) { - int portSeparator = stringUrl.lastIndexOf(':'); - if (portSeparator < 0) { - throw new IllegalArgumentException("Illegal cluster url [" + stringUrl + "]"); + @Before + public void initClient() throws IOException { + if (client == null) { + assert adminClient == null; + assert clusterHosts == null; + String cluster = System.getProperty("tests.rest.cluster"); + if (cluster == null) { + throw new RuntimeException("Must specify [tests.rest.cluster] system property with a comma delimited list of [host:port] " + + "to which to send REST requests"); } - String host = stringUrl.substring(0, portSeparator); - int port = Integer.valueOf(stringUrl.substring(portSeparator + 1)); - clusterHosts.add(new HttpHost(host, port, getProtocol())); - } - this.clusterHosts = unmodifiableList(clusterHosts); - try { + String[] stringUrls = cluster.split(","); + List hosts = new ArrayList<>(stringUrls.length); + for (String stringUrl : stringUrls) { + int portSeparator = stringUrl.lastIndexOf(':'); + if (portSeparator < 0) { + throw new IllegalArgumentException("Illegal cluster url [" + stringUrl + "]"); + } + String host = stringUrl.substring(0, portSeparator); + int port = Integer.valueOf(stringUrl.substring(portSeparator + 1)); + hosts.add(new HttpHost(host, port, getProtocol())); + } + clusterHosts = unmodifiableList(hosts); + logger.info("initializing REST clients against {}", clusterHosts); client = buildClient(restClientSettings()); adminClient = buildClient(restAdminSettings()); - } catch (IOException e) { - // Wrap the IOException so children don't have to declare a constructor just to rethrow it. - throw new RuntimeException("Error building clients", e); } + assert client != null; + assert adminClient != null; + assert clusterHosts != null; } - /** * Clean up after the test case. */ @@ -122,20 +126,30 @@ public class ESRestTestCase extends ESTestCase { public final void after() throws Exception { wipeCluster(); logIfThereAreRunningTasks(); - closeClients(); + } + + @AfterClass + public static void closeClients() throws IOException { + try { + IOUtils.close(client, adminClient); + } finally { + clusterHosts = null; + client = null; + adminClient = null; + } } /** - * Get a client, building it if it hasn't been built for this test. + * Get the client used for ordinary api calls while writing a test */ - protected final RestClient client() { + protected static RestClient client() { return client; } /** * Get the client used for test administrative actions. Do not use this while writing a test. Only use it for cleaning up after tests. */ - protected final RestClient adminClient() { + protected static RestClient adminClient() { return adminClient; } @@ -230,10 +244,6 @@ public class ESRestTestCase extends ESTestCase { */ } - private void closeClients() throws IOException { - IOUtils.close(client, adminClient); - } - /** * Used to obtain settings for the REST client that is used to send REST requests. */ @@ -262,8 +272,9 @@ public class ESRestTestCase extends ESTestCase { return "http"; } - private RestClient buildClient(Settings settings) throws IOException { - RestClientBuilder builder = RestClient.builder(clusterHosts.toArray(new HttpHost[0])).setMaxRetryTimeoutMillis(30000) + private static RestClient buildClient(Settings settings) throws IOException { + RestClientBuilder builder = RestClient.builder(clusterHosts.toArray(new HttpHost[clusterHosts.size()])) + .setMaxRetryTimeoutMillis(30000) .setRequestConfigCallback(requestConfigBuilder -> requestConfigBuilder.setSocketTimeout(30000)); String keystorePath = settings.get(TRUSTSTORE_PATH); if (keystorePath != null) { @@ -314,5 +325,4 @@ public class ESRestTestCase extends ESTestCase { } return runningTasks; } - } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java index 14affcaf3eb..ff829aafee8 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestClient.java @@ -19,7 +19,6 @@ package org.elasticsearch.test.rest.yaml; import com.carrotsearch.randomizedtesting.RandomizedTest; - import org.apache.http.Header; import org.apache.http.HttpEntity; import org.apache.http.HttpHost; @@ -32,13 +31,11 @@ import org.elasticsearch.client.Response; import org.elasticsearch.client.ResponseException; import org.elasticsearch.client.RestClient; import org.elasticsearch.common.Strings; -import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.util.set.Sets; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestApi; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestPath; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestSpec; -import org.junit.BeforeClass; import java.io.IOException; import java.net.URI; @@ -61,94 +58,16 @@ public class ClientYamlTestClient { //query_string params that don't need to be declared in the spec, they are supported by default private static final Set ALWAYS_ACCEPTED_QUERY_STRING_PARAMS = Sets.newHashSet("pretty", "source", "filter_path"); - private static boolean loggedInit = false; - private final ClientYamlSuiteRestSpec restSpec; private final RestClient restClient; private final Version esVersion; - public ClientYamlTestClient(ClientYamlSuiteRestSpec restSpec, RestClient restClient, List hosts) throws IOException { + public ClientYamlTestClient(ClientYamlSuiteRestSpec restSpec, RestClient restClient, List hosts, + Version esVersion) throws IOException { assert hosts.size() > 0; this.restSpec = restSpec; this.restClient = restClient; - Tuple versionTuple = readMasterAndMinNodeVersion(hosts.size()); - this.esVersion = versionTuple.v1(); - Version masterVersion = versionTuple.v2(); - if (false == loggedInit) { - /* This will be logged once per suite which lines up with randomized runner's dumping the output of all failing suites. It'd - * be super noisy to log this once per test. We can't log it in a @BeforeClass method because we need the class variables. */ - logger.info("initializing client, minimum es version: [{}] master version: [{}] hosts: {}", esVersion, masterVersion, hosts); - loggedInit = true; - } - } - - /** - * Reset {@link #loggedInit} so we log the connection setup before this suite. - */ - @BeforeClass - public static void clearLoggedInit() { - loggedInit = false; - } - - private Tuple readMasterAndMinNodeVersion(int numHosts) throws IOException { - try { - // we simply go to the _cat/nodes API and parse all versions in the cluster - Response response = restClient.performRequest("GET", "/_cat/nodes", Collections.singletonMap("h", "version,master")); - ClientYamlTestResponse restTestResponse = new ClientYamlTestResponse(response); - String nodesCatResponse = restTestResponse.getBodyAsString(); - String[] split = nodesCatResponse.split("\n"); - Version version = null; - Version masterVersion = null; - for (String perNode : split) { - final String[] versionAndMaster = perNode.split("\\s+"); - assert versionAndMaster.length == 2 : "invalid line: " + perNode + " length: " + versionAndMaster.length; - final Version currentVersion = Version.fromString(versionAndMaster[0]); - final boolean master = versionAndMaster[1].trim().equals("*"); - if (master) { - assert masterVersion == null; - masterVersion = currentVersion; - } - if (version == null) { - version = currentVersion; - } else if (version.onOrAfter(currentVersion)) { - version = currentVersion; - } - } - return new Tuple<>(version, masterVersion); - } catch (ResponseException ex) { - if (ex.getResponse().getStatusLine().getStatusCode() == 403) { - logger.warn("Fallback to simple info '/' request, _cat/nodes is not authorized"); - final Version version = readAndCheckVersion(numHosts); - return new Tuple<>(version, version); - } - throw ex; - } - } - - private Version readAndCheckVersion(int numHosts) throws IOException { - ClientYamlSuiteRestApi restApi = restApi("info"); - assert restApi.getPaths().size() == 1; - assert restApi.getMethods().size() == 1; - Version version = null; - for (int i = 0; i < numHosts; i++) { - //we don't really use the urls here, we rely on the client doing round-robin to touch all the nodes in the cluster - String method = restApi.getMethods().get(0); - String endpoint = restApi.getPaths().get(0); - Response response = restClient.performRequest(method, endpoint); - ClientYamlTestResponse restTestResponse = new ClientYamlTestResponse(response); - - Object latestVersion = restTestResponse.evaluate("version.number"); - if (latestVersion == null) { - throw new RuntimeException("elasticsearch version not found in the response"); - } - final Version currentVersion = Version.fromString(restTestResponse.evaluate("version.number").toString()); - if (version == null) { - version = currentVersion; - } else if (version.onOrAfter(currentVersion)) { - version = currentVersion; - } - } - return version; + this.esVersion = esVersion; } public Version getEsVersion() { diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestExecutionContext.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestExecutionContext.java index 2f1e42c12cb..5bc380c3c2d 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestExecutionContext.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ClientYamlTestExecutionContext.java @@ -18,13 +18,10 @@ */ package org.elasticsearch.test.rest.yaml; -import org.apache.http.HttpHost; import org.apache.logging.log4j.Logger; import org.elasticsearch.Version; -import org.elasticsearch.client.RestClient; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestSpec; import java.io.IOException; import java.util.HashMap; @@ -42,15 +39,12 @@ public class ClientYamlTestExecutionContext { private static final Logger logger = Loggers.getLogger(ClientYamlTestExecutionContext.class); private final Stash stash = new Stash(); - - private final ClientYamlSuiteRestSpec restSpec; - - private ClientYamlTestClient restTestClient; + private final ClientYamlTestClient clientYamlTestClient; private ClientYamlTestResponse response; - public ClientYamlTestExecutionContext(ClientYamlSuiteRestSpec restSpec) { - this.restSpec = restSpec; + public ClientYamlTestExecutionContext(ClientYamlTestClient clientYamlTestClient) { + this.clientYamlTestClient = clientYamlTestClient; } /** @@ -104,7 +98,7 @@ public class ClientYamlTestExecutionContext { private ClientYamlTestResponse callApiInternal(String apiName, Map params, String body, Map headers) throws IOException { - return restTestClient.callApi(apiName, params, body, headers); + return clientYamlTestClient.callApi(apiName, params, body, headers); } /** @@ -114,13 +108,6 @@ public class ClientYamlTestExecutionContext { return response.evaluate(path, stash); } - /** - * Creates the embedded REST client when needed. Needs to be called before each test. - */ - public void initClient(RestClient client, List hosts) throws IOException { - restTestClient = new ClientYamlTestClient(restSpec, client, hosts); - } - /** * Clears the last obtained response and the stashed fields */ @@ -138,7 +125,7 @@ public class ClientYamlTestExecutionContext { * Returns the current es version as a string */ public Version esVersion() { - return restTestClient.getEsVersion(); + return clientYamlTestClient.getEsVersion(); } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ESClientYamlSuiteTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ESClientYamlSuiteTestCase.java index d44ce71decb..a9b61aae54f 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ESClientYamlSuiteTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/rest/yaml/ESClientYamlSuiteTestCase.java @@ -20,24 +20,27 @@ package org.elasticsearch.test.rest.yaml; import com.carrotsearch.randomizedtesting.RandomizedTest; - +import org.apache.http.HttpHost; import org.apache.lucene.util.IOUtils; +import org.elasticsearch.Version; +import org.elasticsearch.client.Response; +import org.elasticsearch.client.ResponseException; +import org.elasticsearch.client.RestClient; import org.elasticsearch.common.Strings; import org.elasticsearch.common.SuppressForbidden; +import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.test.rest.ESRestTestCase; import org.elasticsearch.test.rest.yaml.parser.ClientYamlTestParseException; import org.elasticsearch.test.rest.yaml.parser.ClientYamlTestSuiteParser; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestApi; import org.elasticsearch.test.rest.yaml.restspec.ClientYamlSuiteRestSpec; +import org.elasticsearch.test.rest.yaml.section.ClientYamlTestSection; import org.elasticsearch.test.rest.yaml.section.ClientYamlTestSuite; import org.elasticsearch.test.rest.yaml.section.DoSection; import org.elasticsearch.test.rest.yaml.section.ExecutableSection; -import org.elasticsearch.test.rest.yaml.section.SkipSection; -import org.elasticsearch.test.rest.yaml.section.ClientYamlTestSection; import org.junit.AfterClass; import org.junit.Before; -import org.junit.BeforeClass; import java.io.IOException; import java.io.InputStream; @@ -51,7 +54,6 @@ import java.nio.file.Path; import java.nio.file.StandardCopyOption; import java.util.ArrayList; import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import java.util.Set; @@ -74,13 +76,13 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { /** * Property that allows to control whether spec validation is enabled or not (default true). */ - public static final String REST_TESTS_VALIDATE_SPEC = "tests.rest.validate_spec"; + private static final String REST_TESTS_VALIDATE_SPEC = "tests.rest.validate_spec"; /** * Property that allows to control where the REST spec files need to be loaded from */ public static final String REST_TESTS_SPEC = "tests.rest.spec"; - public static final String REST_LOAD_PACKAGED_TESTS = "tests.rest.load_packaged"; + private static final String REST_LOAD_PACKAGED_TESTS = "tests.rest.load_packaged"; private static final String DEFAULT_TESTS_PATH = "/rest-api-spec/test"; private static final String DEFAULT_SPEC_PATH = "/rest-api-spec/api"; @@ -96,19 +98,84 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { */ private static final String PATHS_SEPARATOR = "(? blacklistPathMatchers = new ArrayList<>(); + private static List blacklistPathMatchers; private static ClientYamlTestExecutionContext restTestExecutionContext; private static ClientYamlTestExecutionContext adminExecutionContext; private final ClientYamlTestCandidate testCandidate; - public ESClientYamlSuiteTestCase(ClientYamlTestCandidate testCandidate) { + protected ESClientYamlSuiteTestCase(ClientYamlTestCandidate testCandidate) { this.testCandidate = testCandidate; - String[] blacklist = resolvePathsProperty(REST_TESTS_BLACKLIST, null); - for (String entry : blacklist) { - this.blacklistPathMatchers.add(new BlacklistedPathPatternMatcher(entry)); + } + + @Before + public void initAndResetContext() throws IOException { + if (restTestExecutionContext == null) { + assert adminExecutionContext == null; + assert blacklistPathMatchers == null; + String[] blacklist = resolvePathsProperty(REST_TESTS_BLACKLIST, null); + blacklistPathMatchers = new ArrayList<>(); + for (String entry : blacklist) { + blacklistPathMatchers.add(new BlacklistedPathPatternMatcher(entry)); + } + String[] specPaths = resolvePathsProperty(REST_TESTS_SPEC, DEFAULT_SPEC_PATH); + ClientYamlSuiteRestSpec restSpec = null; + FileSystem fileSystem = getFileSystem(); + // don't make a try-with, getFileSystem returns null + // ... and you can't close() the default filesystem + try { + restSpec = ClientYamlSuiteRestSpec.parseFrom(fileSystem, DEFAULT_SPEC_PATH, specPaths); + } finally { + IOUtils.close(fileSystem); + } + validateSpec(restSpec); + List hosts = getClusterHosts(); + RestClient restClient = client(); + Version esVersion; + try { + Tuple versionVersionTuple = readVersionsFromCatNodes(restClient); + esVersion = versionVersionTuple.v1(); + Version masterVersion = versionVersionTuple.v2(); + logger.info("initializing yaml client, minimum es version: [{}] master version: [{}] hosts: {}", + esVersion, masterVersion, hosts); + } catch (ResponseException ex) { + if (ex.getResponse().getStatusLine().getStatusCode() == 403) { + logger.warn("Fallback to simple info '/' request, _cat/nodes is not authorized"); + esVersion = readVersionsFromInfo(restClient, hosts.size()); + logger.info("initializing yaml client, minimum es version: [{}] hosts: {}", esVersion, hosts); + } else { + throw ex; + } + } + ClientYamlTestClient clientYamlTestClient = new ClientYamlTestClient(restSpec, restClient, hosts, esVersion); + restTestExecutionContext = new ClientYamlTestExecutionContext(clientYamlTestClient); + adminExecutionContext = new ClientYamlTestExecutionContext(clientYamlTestClient); + } + assert restTestExecutionContext != null; + assert adminExecutionContext != null; + assert blacklistPathMatchers != null; + + // admin context must be available for @After always, regardless of whether the test was blacklisted + adminExecutionContext.clear(); + + //skip test if it matches one of the blacklist globs + for (BlacklistedPathPatternMatcher blacklistedPathMatcher : blacklistPathMatchers) { + String testPath = testCandidate.getSuitePath() + "/" + testCandidate.getTestSection().getName(); + assumeFalse("[" + testCandidate.getTestPath() + "] skipped, reason: blacklisted", blacklistedPathMatcher + .isSuffixMatch(testPath)); } + restTestExecutionContext.clear(); + + //skip test if the whole suite (yaml file) is disabled + assumeFalse(testCandidate.getSetupSection().getSkipSection().getSkipMessage(testCandidate.getSuitePath()), + testCandidate.getSetupSection().getSkipSection().skip(restTestExecutionContext.esVersion())); + //skip test if the whole suite (yaml file) is disabled + assumeFalse(testCandidate.getTeardownSection().getSkipSection().getSkipMessage(testCandidate.getSuitePath()), + testCandidate.getTeardownSection().getSkipSection().skip(restTestExecutionContext.esVersion())); + //skip test if test section is disabled + assumeFalse(testCandidate.getTestSection().getSkipSection().getSkipMessage(testCandidate.getTestPath()), + testCandidate.getTestSection().getSkipSection().skip(restTestExecutionContext.esVersion())); } @Override @@ -118,7 +185,6 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { } public static Iterable createParameters() throws IOException, ClientYamlTestParseException { - //parse tests only if rest test group is enabled, otherwise rest tests might not even be available on file system List restTestCandidates = collectTestCandidates(); List objects = new ArrayList<>(); for (ClientYamlTestCandidate restTestCandidate : restTestCandidates) { @@ -151,12 +217,7 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { } //sort the candidates so they will always be in the same order before being shuffled, for repeatability - Collections.sort(testCandidates, new Comparator() { - @Override - public int compare(ClientYamlTestCandidate o1, ClientYamlTestCandidate o2) { - return o1.getTestPath().compareTo(o2.getTestPath()); - } - }); + Collections.sort(testCandidates, (o1, o2) -> o1.getTestPath().compareTo(o2.getTestPath())); return testCandidates; } @@ -175,7 +236,7 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { * are available from classpath. */ @SuppressForbidden(reason = "proper use of URL, hack around a JDK bug") - static FileSystem getFileSystem() throws IOException { + protected static FileSystem getFileSystem() throws IOException { // REST suite handling is currently complicated, with lots of filtering and so on // For now, to work embedded in a jar, return a ZipFileSystem over the jar contents. URL codeLocation = FileUtils.class.getProtectionDomain().getCodeSource().getLocation(); @@ -189,7 +250,7 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { try (InputStream in = codeLocation.openStream()) { Files.copy(in, tmp, StandardCopyOption.REPLACE_EXISTING); } - return FileSystems.newFileSystem(new URI("jar:" + tmp.toUri()), Collections.emptyMap()); + return FileSystems.newFileSystem(new URI("jar:" + tmp.toUri()), Collections.emptyMap()); } catch (URISyntaxException e) { throw new IOException("couldn't open zipfilesystem: ", e); } @@ -198,23 +259,6 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { } } - @BeforeClass - public static void initExecutionContext() throws IOException { - String[] specPaths = resolvePathsProperty(REST_TESTS_SPEC, DEFAULT_SPEC_PATH); - ClientYamlSuiteRestSpec restSpec = null; - FileSystem fileSystem = getFileSystem(); - // don't make a try-with, getFileSystem returns null - // ... and you can't close() the default filesystem - try { - restSpec = ClientYamlSuiteRestSpec.parseFrom(fileSystem, DEFAULT_SPEC_PATH, specPaths); - } finally { - IOUtils.close(fileSystem); - } - validateSpec(restSpec); - restTestExecutionContext = new ClientYamlTestExecutionContext(restSpec); - adminExecutionContext = new ClientYamlTestExecutionContext(restSpec); - } - protected ClientYamlTestExecutionContext getAdminExecutionContext() { return adminExecutionContext; } @@ -238,35 +282,55 @@ public abstract class ESClientYamlSuiteTestCase extends ESRestTestCase { @AfterClass public static void clearStatic() { + blacklistPathMatchers = null; restTestExecutionContext = null; adminExecutionContext = null; } - @Before - public void reset() throws IOException { - // admin context must be available for @After always, regardless of whether the test was blacklisted - adminExecutionContext.initClient(adminClient(), getClusterHosts()); - adminExecutionContext.clear(); - - //skip test if it matches one of the blacklist globs - for (BlacklistedPathPatternMatcher blacklistedPathMatcher : blacklistPathMatchers) { - String testPath = testCandidate.getSuitePath() + "/" + testCandidate.getTestSection().getName(); - assumeFalse("[" + testCandidate.getTestPath() + "] skipped, reason: blacklisted", blacklistedPathMatcher - .isSuffixMatch(testPath)); + private static Tuple readVersionsFromCatNodes(RestClient restClient) throws IOException { + // we simply go to the _cat/nodes API and parse all versions in the cluster + Response response = restClient.performRequest("GET", "/_cat/nodes", Collections.singletonMap("h", "version,master")); + ClientYamlTestResponse restTestResponse = new ClientYamlTestResponse(response); + String nodesCatResponse = restTestResponse.getBodyAsString(); + String[] split = nodesCatResponse.split("\n"); + Version version = null; + Version masterVersion = null; + for (String perNode : split) { + final String[] versionAndMaster = perNode.split("\\s+"); + assert versionAndMaster.length == 2 : "invalid line: " + perNode + " length: " + versionAndMaster.length; + final Version currentVersion = Version.fromString(versionAndMaster[0]); + final boolean master = versionAndMaster[1].trim().equals("*"); + if (master) { + assert masterVersion == null; + masterVersion = currentVersion; + } + if (version == null) { + version = currentVersion; + } else if (version.onOrAfter(currentVersion)) { + version = currentVersion; + } } - //The client needs non static info to get initialized, therefore it can't be initialized in the before class - restTestExecutionContext.initClient(client(), getClusterHosts()); - restTestExecutionContext.clear(); + return new Tuple<>(version, masterVersion); + } - //skip test if the whole suite (yaml file) is disabled - assumeFalse(testCandidate.getSetupSection().getSkipSection().getSkipMessage(testCandidate.getSuitePath()), - testCandidate.getSetupSection().getSkipSection().skip(restTestExecutionContext.esVersion())); - //skip test if the whole suite (yaml file) is disabled - assumeFalse(testCandidate.getTeardownSection().getSkipSection().getSkipMessage(testCandidate.getSuitePath()), - testCandidate.getTeardownSection().getSkipSection().skip(restTestExecutionContext.esVersion())); - //skip test if test section is disabled - assumeFalse(testCandidate.getTestSection().getSkipSection().getSkipMessage(testCandidate.getTestPath()), - testCandidate.getTestSection().getSkipSection().skip(restTestExecutionContext.esVersion())); + private static Version readVersionsFromInfo(RestClient restClient, int numHosts) throws IOException { + Version version = null; + for (int i = 0; i < numHosts; i++) { + //we don't really use the urls here, we rely on the client doing round-robin to touch all the nodes in the cluster + Response response = restClient.performRequest("GET", "/"); + ClientYamlTestResponse restTestResponse = new ClientYamlTestResponse(response); + Object latestVersion = restTestResponse.evaluate("version.number"); + if (latestVersion == null) { + throw new RuntimeException("elasticsearch version not found in the response"); + } + final Version currentVersion = Version.fromString(latestVersion.toString()); + if (version == null) { + version = currentVersion; + } else if (version.onOrAfter(currentVersion)) { + version = currentVersion; + } + } + return version; } public void test() throws IOException { diff --git a/test/framework/src/main/java/org/elasticsearch/test/transport/CapturingTransport.java b/test/framework/src/main/java/org/elasticsearch/test/transport/CapturingTransport.java index 6b3ed0bbad0..7dd4569dc56 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/transport/CapturingTransport.java +++ b/test/framework/src/main/java/org/elasticsearch/test/transport/CapturingTransport.java @@ -30,6 +30,7 @@ import org.elasticsearch.common.transport.BoundTransportAddress; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.ConnectionProfile; import org.elasticsearch.transport.RemoteTransportException; import org.elasticsearch.transport.SendRequestTransportException; import org.elasticsearch.transport.Transport; @@ -213,23 +214,13 @@ public class CapturingTransport implements Transport { return new TransportAddress[0]; } - @Override - public boolean addressSupported(Class address) { - return false; - } - @Override public boolean nodeConnected(DiscoveryNode node) { return true; } @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - - } - - @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { } diff --git a/test/framework/src/main/java/org/elasticsearch/test/transport/MockTransportService.java b/test/framework/src/main/java/org/elasticsearch/test/transport/MockTransportService.java index ac400065386..2790b548b18 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/transport/MockTransportService.java +++ b/test/framework/src/main/java/org/elasticsearch/test/transport/MockTransportService.java @@ -42,6 +42,7 @@ import org.elasticsearch.tasks.TaskManager; import org.elasticsearch.test.tasks.MockTaskManager; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.ConnectionProfile; import org.elasticsearch.transport.MockTcpTransport; import org.elasticsearch.transport.RequestHandlerRegistry; import org.elasticsearch.transport.Transport; @@ -175,13 +176,9 @@ public final class MockTransportService extends TransportService { */ public void addFailToSendNoConnectRule(TransportAddress transportAddress) { addDelegate(transportAddress, new DelegateTransport(original) { - @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - throw new ConnectTransportException(node, "DISCONNECT: simulated"); - } @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { throw new ConnectTransportException(node, "DISCONNECT: simulated"); } @@ -222,14 +219,10 @@ public final class MockTransportService extends TransportService { public void addFailToSendNoConnectRule(TransportAddress transportAddress, final Set blockedActions) { addDelegate(transportAddress, new DelegateTransport(original) { - @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - original.connectToNode(node); - } @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { - original.connectToNodeLight(node); + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { + original.connectToNode(node, connectionProfile); } @Override @@ -260,13 +253,9 @@ public final class MockTransportService extends TransportService { */ public void addUnresponsiveRule(TransportAddress transportAddress) { addDelegate(transportAddress, new DelegateTransport(original) { - @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - throw new ConnectTransportException(node, "UNRESPONSIVE: simulated"); - } @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { throw new ConnectTransportException(node, "UNRESPONSIVE: simulated"); } @@ -308,10 +297,10 @@ public final class MockTransportService extends TransportService { } @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { TimeValue delay = getDelay(); if (delay.millis() <= 0) { - original.connectToNode(node); + original.connectToNode(node, connectionProfile); return; } @@ -320,30 +309,7 @@ public final class MockTransportService extends TransportService { try { if (delay.millis() < connectingTimeout.millis()) { Thread.sleep(delay.millis()); - original.connectToNode(node); - } else { - Thread.sleep(connectingTimeout.millis()); - throw new ConnectTransportException(node, "UNRESPONSIVE: simulated"); - } - } catch (InterruptedException e) { - throw new ConnectTransportException(node, "UNRESPONSIVE: interrupted while sleeping", e); - } - } - - @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { - TimeValue delay = getDelay(); - if (delay.millis() <= 0) { - original.connectToNodeLight(node); - return; - } - - // TODO: Replace with proper setting - TimeValue connectingTimeout = NetworkService.TcpSettings.TCP_CONNECT_TIMEOUT.getDefault(Settings.EMPTY); - try { - if (delay.millis() < connectingTimeout.millis()) { - Thread.sleep(delay.millis()); - original.connectToNodeLight(node); + original.connectToNode(node, connectionProfile); } else { Thread.sleep(connectingTimeout.millis()); throw new ConnectTransportException(node, "UNRESPONSIVE: simulated"); @@ -461,14 +427,10 @@ public final class MockTransportService extends TransportService { return getTransport(node).nodeConnected(node); } - @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - getTransport(node).connectToNode(node); - } @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { - getTransport(node).connectToNodeLight(node); + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { + getTransport(node).connectToNode(node, connectionProfile); } @Override @@ -511,24 +473,14 @@ public final class MockTransportService extends TransportService { return transport.addressesFromString(address, perAddressLimit); } - @Override - public boolean addressSupported(Class address) { - return transport.addressSupported(address); - } - @Override public boolean nodeConnected(DiscoveryNode node) { return transport.nodeConnected(node); } @Override - public void connectToNode(DiscoveryNode node) throws ConnectTransportException { - transport.connectToNode(node); - } - - @Override - public void connectToNodeLight(DiscoveryNode node) throws ConnectTransportException { - transport.connectToNodeLight(node); + public void connectToNode(DiscoveryNode node, ConnectionProfile connectionProfile) throws ConnectTransportException { + transport.connectToNode(node, connectionProfile); } @Override diff --git a/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java b/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java index ad37a7cacb3..cc32fb3f6ac 100644 --- a/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/transport/AbstractSimpleTransportTestCase.java @@ -1310,7 +1310,7 @@ public abstract class AbstractSimpleTransportTestCase extends ESTestCase { } try { - serviceB.connectToNodeLightAndHandshake(nodeA, 100); + serviceB.connectToNodeAndHandshake(nodeA, 100); fail("exception should be thrown"); } catch (ConnectTransportException e) { // all is well @@ -1368,7 +1368,7 @@ public abstract class AbstractSimpleTransportTestCase extends ESTestCase { } try { - serviceB.connectToNodeLightAndHandshake(nodeA, 100); + serviceB.connectToNodeAndHandshake(nodeA, 100); fail("exception should be thrown"); } catch (ConnectTransportException e) { // all is well diff --git a/test/framework/src/main/java/org/elasticsearch/transport/MockTcpTransport.java b/test/framework/src/main/java/org/elasticsearch/transport/MockTcpTransport.java index 84d0bed8c04..fc33ce3c635 100644 --- a/test/framework/src/main/java/org/elasticsearch/transport/MockTcpTransport.java +++ b/test/framework/src/main/java/org/elasticsearch/transport/MockTcpTransport.java @@ -157,17 +157,9 @@ public class MockTcpTransport extends TcpTransport } @Override - protected NodeChannels connectToChannelsLight(DiscoveryNode node) throws IOException { - return connectToChannels(node); - } - - @Override - protected NodeChannels connectToChannels(DiscoveryNode node) throws IOException { - final NodeChannels nodeChannels = new NodeChannels(new MockChannel[1], - new MockChannel[1], - new MockChannel[1], - new MockChannel[1], - new MockChannel[1]); + protected NodeChannels connectToChannels(DiscoveryNode node, ConnectionProfile profile) throws IOException { + final MockChannel[] mockChannels = new MockChannel[1]; + final NodeChannels nodeChannels = new NodeChannels(mockChannels, ConnectionProfile.LIGHT_PROFILE); // we always use light here boolean success = false; final Socket socket = new Socket(); try { @@ -189,11 +181,7 @@ public class MockTcpTransport extends TcpTransport socket.connect(address, (int) TCP_CONNECT_TIMEOUT.get(settings).millis()); MockChannel channel = new MockChannel(socket, address, "none", onClose); channel.loopRead(executor); - for (MockChannel[] channels : nodeChannels.getChannelArrays()) { - for (int i = 0; i < channels.length; i++) { - channels[i] = channel; - } - } + mockChannels[0] = channel; success = true; } finally { if (success == false) {