Remove script access to term statistics (#19462)

In scripts (at least some of the languages), the terms dictionary and
postings can be access with the special _index variable. This is for
very advanced use cases which want to do their own scoring. The problem
is segment level statistics must be recomputed for every document.
Additionally, this is not friendly to the terms index caching as the
order of looking up terms should be controlled by lucene.

This change removes _index from scripts. Anyone using it can and should
instead write a Similarity plugin, which is explicitly designed to allow
doing the calculations needed for a relevance score.

closes #19359
This commit is contained in:
Ryan Ernst 2017-05-16 09:10:09 -07:00 committed by GitHub
parent 1cae850cf5
commit 97d2657e18
13 changed files with 8 additions and 2032 deletions

View File

@ -23,7 +23,6 @@ import org.apache.lucene.search.Scorer;
import org.elasticsearch.index.fielddata.ScriptDocValues;
import org.elasticsearch.search.lookup.LeafDocLookup;
import org.elasticsearch.search.lookup.LeafFieldsLookup;
import org.elasticsearch.search.lookup.LeafIndexLookup;
import org.elasticsearch.search.lookup.LeafSearchLookup;
import org.elasticsearch.search.lookup.SourceLookup;
@ -87,13 +86,6 @@ public abstract class AbstractSearchScript extends AbstractExecutableScript impl
return lookup.source();
}
/**
* Allows to access statistics on terms and fields.
*/
protected final LeafIndexLookup indexLookup() {
return lookup.indexLookup();
}
/**
* Allows to access the *stored* fields.
*/

View File

@ -1,132 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.IntsRefBuilder;
import java.io.IOException;
import java.util.Iterator;
/*
* Can iterate over the positions of a term an arbitrary number of times.
* */
public class CachedPositionIterator extends PositionIterator {
public CachedPositionIterator(IndexFieldTerm indexFieldTerm) {
super(indexFieldTerm);
}
// all payloads of the term in the current document in one bytes array.
// payloadStarts and payloadLength mark the start and end of one payload.
final BytesRefBuilder payloads = new BytesRefBuilder();
final IntsRefBuilder payloadsLengths = new IntsRefBuilder();
final IntsRefBuilder payloadsStarts = new IntsRefBuilder();
final IntsRefBuilder positions = new IntsRefBuilder();
final IntsRefBuilder startOffsets = new IntsRefBuilder();
final IntsRefBuilder endOffsets = new IntsRefBuilder();
final BytesRef payload = new BytesRef();
@Override
public Iterator<TermPosition> reset() {
return new Iterator<TermPosition>() {
private int pos = 0;
private final TermPosition termPosition = new TermPosition();
@Override
public boolean hasNext() {
return pos < freq;
}
@Override
public TermPosition next() {
termPosition.position = positions.intAt(pos);
termPosition.startOffset = startOffsets.intAt(pos);
termPosition.endOffset = endOffsets.intAt(pos);
termPosition.payload = payload;
payload.bytes = payloads.bytes();
payload.offset = payloadsStarts.intAt(pos);
payload.length = payloadsLengths.intAt(pos);
pos++;
return termPosition;
}
@Override
public void remove() {
}
};
}
private void record() throws IOException {
TermPosition termPosition;
for (int i = 0; i < freq; i++) {
termPosition = super.next();
positions.setIntAt(i, termPosition.position);
addPayload(i, termPosition.payload);
startOffsets.setIntAt(i, termPosition.startOffset);
endOffsets.setIntAt(i, termPosition.endOffset);
}
}
private void ensureSize(int freq) {
if (freq == 0) {
return;
}
startOffsets.grow(freq);
endOffsets.grow(freq);
positions.grow(freq);
payloadsLengths.grow(freq);
payloadsStarts.grow(freq);
payloads.grow(freq * 8);// this is just a guess....
}
private void addPayload(int i, BytesRef currPayload) {
if (currPayload != null) {
payloadsLengths.setIntAt(i, currPayload.length);
payloadsStarts.setIntAt(i, i == 0 ? 0 : payloadsStarts.intAt(i - 1) + payloadsLengths.intAt(i - 1));
payloads.grow(payloadsStarts.intAt(i) + currPayload.length);
System.arraycopy(currPayload.bytes, currPayload.offset, payloads.bytes(), payloadsStarts.intAt(i), currPayload.length);
} else {
payloadsLengths.setIntAt(i, 0);
payloadsStarts.setIntAt(i, i == 0 ? 0 : payloadsStarts.intAt(i - 1) + payloadsLengths.intAt(i - 1));
}
}
@Override
public void nextDoc() throws IOException {
super.nextDoc();
ensureSize(freq);
record();
}
@Override
public TermPosition next() {
throw new UnsupportedOperationException();
}
}

View File

@ -1,128 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.lucene.search.CollectionStatistics;
import org.elasticsearch.common.util.MinimalMap;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
/**
* Script interface to all information regarding a field.
* */
public class IndexField extends MinimalMap<String, IndexFieldTerm> {
/*
* TermsInfo Objects that represent the Terms are stored in this map when
* requested. Information such as frequency, doc frequency and positions
* information can be retrieved from the TermInfo objects in this map.
*/
private final Map<String, IndexFieldTerm> terms = new HashMap<>();
// the name of this field
private final String fieldName;
/*
* The holds the current reader. We need it to populate the field
* statistics. We just delegate all requests there
*/
private final LeafIndexLookup indexLookup;
/*
* General field statistics such as number of documents containing the
* field.
*/
private final CollectionStatistics fieldStats;
/*
* Represents a field in a document. Can be used to return information on
* statistics of this field. Information on specific terms in this field can
* be accessed by calling get(String term).
*/
public IndexField(String fieldName, LeafIndexLookup indexLookup) throws IOException {
assert fieldName != null;
this.fieldName = fieldName;
assert indexLookup != null;
this.indexLookup = indexLookup;
fieldStats = this.indexLookup.getIndexSearcher().collectionStatistics(fieldName);
}
/* get number of documents containing the field */
public long docCount() throws IOException {
return fieldStats.docCount();
}
/* get sum of the number of words over all documents that were indexed */
public long sumttf() throws IOException {
return fieldStats.sumTotalTermFreq();
}
/*
* get the sum of doc frequencies over all words that appear in any document
* that has the field.
*/
public long sumdf() throws IOException {
return fieldStats.sumDocFreq();
}
// TODO: might be good to get the field lengths here somewhere?
/*
* Returns a TermInfo object that can be used to access information on
* specific terms. flags can be set as described in TermInfo.
*
* TODO: here might be potential for running time improvement? If we knew in
* advance which terms are requested, we could provide an array which the
* user could then iterate over.
*/
public IndexFieldTerm get(Object key, int flags) {
String termString = (String) key;
IndexFieldTerm indexFieldTerm = terms.get(termString);
// see if we initialized already...
if (indexFieldTerm == null) {
indexFieldTerm = new IndexFieldTerm(termString, fieldName, indexLookup, flags);
terms.put(termString, indexFieldTerm);
}
indexFieldTerm.validateFlags(flags);
return indexFieldTerm;
}
/*
* Returns a TermInfo object that can be used to access information on
* specific terms. flags can be set as described in TermInfo.
*/
@Override
public IndexFieldTerm get(Object key) {
// per default, do not initialize any positions info
return get(key, IndexLookup.FLAG_FREQUENCIES);
}
public void setDocIdInTerms(int docId) {
for (IndexFieldTerm ti : terms.values()) {
ti.setDocument(docId);
}
}
}

View File

@ -1,298 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader.FilterPostingsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;
import java.io.IOException;
import java.util.Iterator;
/**
* Holds all information on a particular term in a field.
* */
public class IndexFieldTerm implements Iterable<TermPosition> {
// The posting list for this term. Is null if the term or field does not
// exist.
PostingsEnum postings;
// Stores if positions, offsets and payloads are requested.
private final int flags;
private final String fieldName;
private final String term;
private final PositionIterator iterator;
// for lucene calls
private final Term identifier;
private final TermStatistics termStats;
// get the document frequency of the term
public long df() throws IOException {
return termStats.docFreq();
}
// get the total term frequency of the term, that is, how often does the
// term appear in any document?
public long ttf() throws IOException {
return termStats.totalTermFreq();
}
// when the reader changes, we have to get the posting list for this term
// and reader
private void setReader(LeafReader reader) {
try {
postings = getPostings(convertToLuceneFlags(flags), reader);
if (postings == null) {
// no term or field for this segment, fake out the postings...
final DocIdSetIterator empty = DocIdSetIterator.empty();
postings = new PostingsEnum() {
@Override
public int docID() {
return empty.docID();
}
@Override
public int nextDoc() throws IOException {
return empty.nextDoc();
}
@Override
public int advance(int target) throws IOException {
return empty.advance(target);
}
@Override
public long cost() {
return empty.cost();
}
@Override
public int freq() throws IOException {
return 1;
}
@Override
public int nextPosition() throws IOException {
return -1;
}
@Override
public int startOffset() throws IOException {
return -1;
}
@Override
public int endOffset() throws IOException {
return -1;
}
@Override
public BytesRef getPayload() throws IOException {
return null;
}
};
}
} catch (IOException e) {
throw new ElasticsearchException("Unable to get postings for field " + fieldName + " and term " + term, e);
}
}
private int convertToLuceneFlags(int flags) {
int lucenePositionsFlags = PostingsEnum.NONE;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_FREQUENCIES) > 0 ? PostingsEnum.FREQS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_POSITIONS) > 0 ? PostingsEnum.POSITIONS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_PAYLOADS) > 0 ? PostingsEnum.PAYLOADS : 0x0;
lucenePositionsFlags |= (flags & IndexLookup.FLAG_OFFSETS) > 0 ? PostingsEnum.OFFSETS : 0x0;
return lucenePositionsFlags;
}
private PostingsEnum getPostings(int luceneFlags, LeafReader reader) throws IOException {
assert identifier.field() != null;
assert identifier.bytes() != null;
final Fields fields = reader.fields();
PostingsEnum newPostings = null;
if (fields != null) {
final Terms terms = fields.terms(identifier.field());
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
if (termsEnum.seekExact(identifier.bytes())) {
newPostings = termsEnum.postings(postings, luceneFlags);
final Bits liveDocs = reader.getLiveDocs();
if (liveDocs != null) {
newPostings = new FilterPostingsEnum(newPostings) {
private int doNext(int d) throws IOException {
for (; d != NO_MORE_DOCS; d = super.nextDoc()) {
if (liveDocs.get(d)) {
return d;
}
}
return NO_MORE_DOCS;
}
@Override
public int nextDoc() throws IOException {
return doNext(super.nextDoc());
}
@Override
public int advance(int target) throws IOException {
return doNext(super.advance(target));
}
};
}
}
}
}
return newPostings;
}
private int freq = 0;
public void setDocument(int docId) {
assert (postings != null);
try {
// we try to advance to the current document.
int currentDocPos = postings.docID();
if (currentDocPos < docId) {
currentDocPos = postings.advance(docId);
}
if (currentDocPos == docId) {
freq = postings.freq();
} else {
freq = 0;
}
iterator.nextDoc();
} catch (IOException e) {
throw new ElasticsearchException("While trying to initialize term positions in IndexFieldTerm.setNextDoc() ", e);
}
}
public IndexFieldTerm(String term, String fieldName, LeafIndexLookup indexLookup, int flags) {
assert fieldName != null;
this.fieldName = fieldName;
assert term != null;
this.term = term;
assert indexLookup != null;
identifier = new Term(fieldName, (String) term);
this.flags = flags;
boolean doRecord = ((flags & IndexLookup.FLAG_CACHE) > 0);
if (!doRecord) {
iterator = new PositionIterator(this);
} else {
iterator = new CachedPositionIterator(this);
}
setReader(indexLookup.getReader());
setDocument(indexLookup.getDocId());
try {
termStats = indexLookup.getIndexSearcher().termStatistics(identifier,
TermContext.build(indexLookup.getReaderContext(), identifier));
} catch (IOException e) {
throw new ElasticsearchException("Cannot get term statistics: ", e);
}
}
public int tf() throws IOException {
return freq;
}
@Override
public Iterator<TermPosition> iterator() {
return iterator.reset();
}
/*
* A user might decide inside a script to call get with _POSITIONS and then
* a second time with _PAYLOADS. If the positions were recorded but the
* payloads were not, the user will not have access to them. Therefore, throw
* exception here explaining how to call get().
*/
public void validateFlags(int flags2) {
if ((this.flags & flags2) < flags2) {
throw new ElasticsearchException("You must call get with all required flags! Instead of " + getCalledStatement(flags2)
+ "call " + getCallStatement(flags2 | this.flags) + " once");
}
}
private String getCalledStatement(int flags2) {
String calledFlagsCall1 = getFlagsString(flags);
String calledFlagsCall2 = getFlagsString(flags2);
String callStatement1 = getCallStatement(calledFlagsCall1);
String callStatement2 = getCallStatement(calledFlagsCall2);
return " " + callStatement1 + " and " + callStatement2 + " ";
}
private String getCallStatement(String calledFlags) {
return "_index['" + this.fieldName + "'].get('" + this.term + "', " + calledFlags + ")";
}
private String getFlagsString(int flags2) {
String flagsString = null;
if ((flags2 & IndexLookup.FLAG_FREQUENCIES) != 0) {
flagsString = anddToFlagsString(flagsString, "_FREQUENCIES");
}
if ((flags2 & IndexLookup.FLAG_POSITIONS) != 0) {
flagsString = anddToFlagsString(flagsString, "_POSITIONS");
}
if ((flags2 & IndexLookup.FLAG_OFFSETS) != 0) {
flagsString = anddToFlagsString(flagsString, "_OFFSETS");
}
if ((flags2 & IndexLookup.FLAG_PAYLOADS) != 0) {
flagsString = anddToFlagsString(flagsString, "_PAYLOADS");
}
if ((flags2 & IndexLookup.FLAG_CACHE) != 0) {
flagsString = anddToFlagsString(flagsString, "_CACHE");
}
return flagsString;
}
private String anddToFlagsString(String flagsString, String flag) {
if (flagsString != null) {
flagsString += " | ";
} else {
flagsString = "";
}
flagsString += flag;
return flagsString;
}
private String getCallStatement(int flags2) {
String calledFlags = getFlagsString(flags2);
String callStatement = getCallStatement(calledFlags);
return " " + callStatement + " ";
}
}

View File

@ -1,74 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.lucene.index.LeafReaderContext;
import java.util.HashMap;
import java.util.Map;
import static java.util.Collections.unmodifiableMap;
public class IndexLookup {
public static final Map<String, Object> NAMES;
static {
Map<String, Object> names = new HashMap<>();
names.put("_FREQUENCIES", IndexLookup.FLAG_FREQUENCIES);
names.put("_POSITIONS", IndexLookup.FLAG_POSITIONS);
names.put("_OFFSETS", IndexLookup.FLAG_OFFSETS);
names.put("_PAYLOADS", IndexLookup.FLAG_PAYLOADS);
names.put("_CACHE", IndexLookup.FLAG_CACHE);
NAMES = unmodifiableMap(names);
}
/**
* Flag to pass to {@link IndexField#get(Object, int)} if you require
* offsets in the returned {@link IndexFieldTerm}.
*/
public static final int FLAG_OFFSETS = 2;
/**
* Flag to pass to {@link IndexField#get(Object, int)} if you require
* payloads in the returned {@link IndexFieldTerm}.
*/
public static final int FLAG_PAYLOADS = 4;
/**
* Flag to pass to {@link IndexField#get(Object, int)} if you require
* frequencies in the returned {@link IndexFieldTerm}. Frequencies might be
* returned anyway for some lucene codecs even if this flag is no set.
*/
public static final int FLAG_FREQUENCIES = 8;
/**
* Flag to pass to {@link IndexField#get(Object, int)} if you require
* positions in the returned {@link IndexFieldTerm}.
*/
public static final int FLAG_POSITIONS = 16;
/**
* Flag to pass to {@link IndexField#get(Object, int)} if you require
* positions in the returned {@link IndexFieldTerm}.
*/
public static final int FLAG_CACHE = 32;
public static LeafIndexLookup getLeafIndexLookup(LeafReaderContext context) {
return new LeafIndexLookup(context);
}
}

View File

@ -1,199 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.logging.DeprecationLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.util.MinimalMap;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class LeafIndexLookup extends MinimalMap<String, IndexField> {
// Current reader from which we can get the term vectors. No info on term
// and field statistics.
private final LeafReader reader;
// The parent reader from which we can get proper field and term
// statistics
private final IndexReader parentReader;
// we need this later to get the field and term statistics of the shard
private final IndexSearcher indexSearcher;
// current docId
private int docId = -1;
// stores the objects that are used in the script. we maintain this map
// because we do not want to re-initialize the objects each time a field is
// accessed
private final Map<String, IndexField> indexFields = new HashMap<>();
// number of documents per shard. cached here because the computation is
// expensive
private int numDocs = -1;
// the maximum doc number of the shard.
private int maxDoc = -1;
// number of deleted documents per shard. cached here because the
// computation is expensive
private int numDeletedDocs = -1;
private boolean deprecationEmitted = false;
private void logDeprecation() {
if (deprecationEmitted == false) {
Logger logger = Loggers.getLogger(getClass());
DeprecationLogger deprecationLogger = new DeprecationLogger(logger);
deprecationLogger.deprecated("Using _index is deprecated. Create a custom ScriptEngine to access index internals.");
deprecationEmitted = true;
}
}
public int numDocs() {
logDeprecation();
if (numDocs == -1) {
numDocs = parentReader.numDocs();
}
return numDocs;
}
public int maxDoc() {
logDeprecation();
if (maxDoc == -1) {
maxDoc = parentReader.maxDoc();
}
return maxDoc;
}
public int numDeletedDocs() {
logDeprecation();
if (numDeletedDocs == -1) {
numDeletedDocs = parentReader.numDeletedDocs();
}
return numDeletedDocs;
}
public LeafIndexLookup(LeafReaderContext ctx) {
reader = ctx.reader();
parentReader = ReaderUtil.getTopLevelContext(ctx).reader();
indexSearcher = new IndexSearcher(parentReader);
indexSearcher.setQueryCache(null);
}
public void setDocument(int docId) {
if (this.docId == docId) { // if we are called with the same docId,
// nothing to do
return;
}
// We assume that docs are processed in ascending order of id. If this
// is not the case, we would have to re initialize all posting lists in
// IndexFieldTerm. TODO: Instead of assert we could also call
// setReaderInFields(); here?
if (this.docId > docId) {
// This might happen if the same SearchLookup is used in different
// phases, such as score and fetch phase.
// In this case we do not want to re initialize posting list etc.
// because we do not even know if term and field statistics will be
// needed in this new phase.
// Therefore we just remove all IndexFieldTerms.
indexFields.clear();
}
this.docId = docId;
setNextDocIdInFields();
}
protected void setNextDocIdInFields() {
for (IndexField stat : indexFields.values()) {
stat.setDocIdInTerms(this.docId);
}
}
/*
* TODO: here might be potential for running time improvement? If we knew in
* advance which terms are requested, we could provide an array which the
* user could then iterate over.
*/
@Override
public IndexField get(Object key) {
logDeprecation();
String stringField = (String) key;
IndexField indexField = indexFields.get(key);
if (indexField == null) {
try {
indexField = new IndexField(stringField, this);
indexFields.put(stringField, indexField);
} catch (IOException e) {
throw new ElasticsearchException(e);
}
}
return indexField;
}
/*
* Get the lucene term vectors. See
* https://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/Fields.html
* *
*/
public Fields termVectors() throws IOException {
logDeprecation();
assert reader != null;
return reader.getTermVectors(docId);
}
LeafReader getReader() {
logDeprecation();
return reader;
}
public int getDocId() {
logDeprecation();
return docId;
}
public IndexReader getParentReader() {
logDeprecation();
if (parentReader == null) {
return reader;
}
return parentReader;
}
public IndexSearcher getIndexSearcher() {
logDeprecation();
return indexSearcher;
}
public IndexReaderContext getReaderContext() {
logDeprecation();
return getParentReader().getContext();
}
}

View File

@ -35,24 +35,20 @@ public class LeafSearchLookup {
final LeafDocLookup docMap;
final SourceLookup sourceLookup;
final LeafFieldsLookup fieldsLookup;
final LeafIndexLookup indexLookup;
final Map<String, Object> asMap;
public LeafSearchLookup(LeafReaderContext ctx, LeafDocLookup docMap, SourceLookup sourceLookup,
LeafFieldsLookup fieldsLookup, LeafIndexLookup indexLookup, Map<String, Object> topLevelMap) {
LeafFieldsLookup fieldsLookup) {
this.ctx = ctx;
this.docMap = docMap;
this.sourceLookup = sourceLookup;
this.fieldsLookup = fieldsLookup;
this.indexLookup = indexLookup;
Map<String, Object> asMap = new HashMap<>(topLevelMap.size() + 5);
asMap.putAll(topLevelMap);
Map<String, Object> asMap = new HashMap<>(4);
asMap.put("doc", docMap);
asMap.put("_doc", docMap);
asMap.put("_source", sourceLookup);
asMap.put("_fields", fieldsLookup);
asMap.put("_index", indexLookup);
this.asMap = unmodifiableMap(asMap);
}
@ -64,10 +60,6 @@ public class LeafSearchLookup {
return this.sourceLookup;
}
public LeafIndexLookup indexLookup() {
return this.indexLookup;
}
public LeafFieldsLookup fields() {
return this.fieldsLookup;
}
@ -80,6 +72,5 @@ public class LeafSearchLookup {
docMap.setDocument(docId);
sourceLookup.setSegmentAndDocument(ctx, docId);
fieldsLookup.setDocument(docId);
indexLookup.setDocument(docId);
}
}

View File

@ -1,87 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.lucene.index.PostingsEnum;
import org.elasticsearch.ElasticsearchException;
import java.io.IOException;
import java.util.Iterator;
public class PositionIterator implements Iterator<TermPosition> {
private boolean resetted = false;
protected IndexFieldTerm indexFieldTerm;
protected int freq = -1;
// current position of iterator
private int currentPos;
protected final TermPosition termPosition = new TermPosition();
private PostingsEnum postings;
public PositionIterator(IndexFieldTerm indexFieldTerm) {
this.indexFieldTerm = indexFieldTerm;
}
@Override
public void remove() {
throw new UnsupportedOperationException("Cannot remove anything from TermPosition iterator.");
}
@Override
public boolean hasNext() {
return currentPos < freq;
}
@Override
public TermPosition next() {
try {
termPosition.position = postings.nextPosition();
termPosition.startOffset = postings.startOffset();
termPosition.endOffset = postings.endOffset();
termPosition.payload = postings.getPayload();
} catch (IOException ex) {
throw new ElasticsearchException("can not advance iterator", ex);
}
currentPos++;
return termPosition;
}
public void nextDoc() throws IOException {
resetted = false;
currentPos = 0;
freq = indexFieldTerm.tf();
postings = indexFieldTerm.postings;
}
public Iterator<TermPosition> reset() {
if (resetted) {
throw new ElasticsearchException(
"Cannot iterate twice! If you want to iterate more that once, add _CACHE explicitly.");
}
resetted = true;
return this;
}
}

View File

@ -42,9 +42,7 @@ public class SearchLookup {
return new LeafSearchLookup(context,
docMap.getLeafDocLookup(context),
sourceLookup,
fieldsLookup.getLeafFieldsLookup(context),
IndexLookup.getLeafIndexLookup(context),
IndexLookup.NAMES);
fieldsLookup.getLeafFieldsLookup(context));
}
public DocLookup doc() {

View File

@ -1,58 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.lucene.analysis.payloads.PayloadHelper;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
public class TermPosition {
public int position = -1;
public int startOffset = -1;
public int endOffset = -1;
public BytesRef payload;
private CharsRefBuilder spare = new CharsRefBuilder();
public String payloadAsString() {
if (payload != null && payload.length != 0) {
spare.copyUTF8Bytes(payload);
return spare.toString();
} else {
return null;
}
}
public float payloadAsFloat(float defaultMissing) {
if (payload != null && payload.length != 0) {
return PayloadHelper.decodeFloat(payload.bytes, payload.offset);
} else {
return defaultMissing;
}
}
public int payloadAsInt(int defaultMissing) {
if (payload != null && payload.length != 0) {
return PayloadHelper.decodeInt(payload.bytes, payload.offset);
} else {
return defaultMissing;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -13,7 +13,11 @@ milliseconds since epoch as a `long`. The same is true for
`doc.some_date_field[some_number]`. Use `doc.some_date_field.value.millis` to
fetch the milliseconds since epoch if you need it.
==== Removed access to index internal via the _index variable
The `_index` variable has been removed. If you used it for advanced scoring, consider writing a `Similarity` plugin.
==== Script Settings
All of the existing scripting security settings have been deprecated. Instead
they are replaced with `script.allowed_types` and `script.allowed_contexts`.
they are replaced with `script.allowed_types` and `script.allowed_contexts`.

View File

@ -29,10 +29,6 @@ Field values can be accessed from a script using
<<modules-scripting-doc-vals,doc-values>>, or
<<modules-scripting-stored,stored fields or `_source` field>>, which are explained below.
Scripts may also have access to the document's relevance
<<scripting-score,`_score`>> and, via the experimental `_index` variable,
to term statistics for <<modules-advanced-scripting,advanced text scoring>>.
[[scripting-score]]
[float]
=== Accessing the score of a document within a script