make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
/ *
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements . See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership . ElasticSearch licenses this
* file to you under the Apache License , Version 2 . 0 ( the
* " License " ) ; you may not use this file except in compliance
* with the License . You may obtain a copy of the License at
*
* http : //www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing ,
* software distributed under the License is distributed on an
* " AS IS " BASIS , WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND , either express or implied . See the License for the
* specific language governing permissions and limitations
* under the License .
* /
package org.elasticsearch.script ;
import org.elasticsearch.action.search.SearchPhaseExecutionException ;
import org.elasticsearch.action.search.SearchResponse ;
import org.elasticsearch.action.search.ShardSearchFailure ;
import org.elasticsearch.common.settings.ImmutableSettings ;
import org.elasticsearch.common.xcontent.XContentBuilder ;
import org.elasticsearch.common.xcontent.XContentFactory ;
import org.elasticsearch.index.query.QueryBuilders ;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders ;
import org.elasticsearch.search.SearchHit ;
import org.elasticsearch.test.ElasticsearchIntegrationTest ;
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions ;
import org.hamcrest.Matchers ;
import org.junit.Test ;
import java.io.IOException ;
import java.util.ArrayList ;
import java.util.HashMap ;
import java.util.List ;
import java.util.Map ;
import java.util.concurrent.ExecutionException ;
import static org.hamcrest.Matchers.equalTo ;
2014-01-02 13:54:40 +01:00
public class IndexLookupTests extends ElasticsearchIntegrationTest {
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
String includeAllFlag = " _FREQUENCIES | _OFFSETS | _PAYLOADS | _POSITIONS | _CACHE " ;
String includeAllWithoutRecordFlag = " _FREQUENCIES | _OFFSETS | _PAYLOADS | _POSITIONS " ;
private HashMap < String , List < Object > > expectedEndOffsetsArray ;
private HashMap < String , List < Object > > expectedPayloadsArray ;
private HashMap < String , List < Object > > expectedPositionsArray ;
private HashMap < String , List < Object > > emptyArray ;
private HashMap < String , List < Object > > expectedStartOffsetsArray ;
void initTestData ( ) throws InterruptedException , ExecutionException , IOException {
emptyArray = new HashMap < String , List < Object > > ( ) ;
List < Object > empty1 = new ArrayList < Object > ( ) ;
empty1 . add ( - 1 ) ;
empty1 . add ( - 1 ) ;
emptyArray . put ( " 1 " , empty1 ) ;
List < Object > empty2 = new ArrayList < Object > ( ) ;
empty2 . add ( - 1 ) ;
empty2 . add ( - 1 ) ;
emptyArray . put ( " 2 " , empty2 ) ;
List < Object > empty3 = new ArrayList < Object > ( ) ;
empty3 . add ( - 1 ) ;
empty3 . add ( - 1 ) ;
emptyArray . put ( " 3 " , empty3 ) ;
expectedPositionsArray = new HashMap < String , List < Object > > ( ) ;
List < Object > pos1 = new ArrayList < Object > ( ) ;
pos1 . add ( 1 ) ;
pos1 . add ( 2 ) ;
expectedPositionsArray . put ( " 1 " , pos1 ) ;
List < Object > pos2 = new ArrayList < Object > ( ) ;
pos2 . add ( 0 ) ;
pos2 . add ( 1 ) ;
expectedPositionsArray . put ( " 2 " , pos2 ) ;
List < Object > pos3 = new ArrayList < Object > ( ) ;
pos3 . add ( 0 ) ;
pos3 . add ( 4 ) ;
expectedPositionsArray . put ( " 3 " , pos3 ) ;
expectedPayloadsArray = new HashMap < String , List < Object > > ( ) ;
List < Object > pay1 = new ArrayList < Object > ( ) ;
pay1 . add ( 2 ) ;
pay1 . add ( 3 ) ;
expectedPayloadsArray . put ( " 1 " , pay1 ) ;
List < Object > pay2 = new ArrayList < Object > ( ) ;
pay2 . add ( 1 ) ;
pay2 . add ( 2 ) ;
expectedPayloadsArray . put ( " 2 " , pay2 ) ;
List < Object > pay3 = new ArrayList < Object > ( ) ;
pay3 . add ( 1 ) ;
pay3 . add ( - 1 ) ;
expectedPayloadsArray . put ( " 3 " , pay3 ) ;
/ *
* " a|1 b|2 b|3 c|4 d " " b|1 b|2 c|3 d|4 a " " b|1 c|2 d|3 a|4 b "
* /
expectedStartOffsetsArray = new HashMap < String , List < Object > > ( ) ;
List < Object > starts1 = new ArrayList < Object > ( ) ;
starts1 . add ( 4 ) ;
starts1 . add ( 8 ) ;
expectedStartOffsetsArray . put ( " 1 " , starts1 ) ;
List < Object > starts2 = new ArrayList < Object > ( ) ;
starts2 . add ( 0 ) ;
starts2 . add ( 4 ) ;
expectedStartOffsetsArray . put ( " 2 " , starts2 ) ;
List < Object > starts3 = new ArrayList < Object > ( ) ;
starts3 . add ( 0 ) ;
starts3 . add ( 16 ) ;
expectedStartOffsetsArray . put ( " 3 " , starts3 ) ;
expectedEndOffsetsArray = new HashMap < String , List < Object > > ( ) ;
List < Object > ends1 = new ArrayList < Object > ( ) ;
ends1 . add ( 7 ) ;
ends1 . add ( 11 ) ;
expectedEndOffsetsArray . put ( " 1 " , ends1 ) ;
List < Object > ends2 = new ArrayList < Object > ( ) ;
ends2 . add ( 3 ) ;
ends2 . add ( 7 ) ;
expectedEndOffsetsArray . put ( " 2 " , ends2 ) ;
List < Object > ends3 = new ArrayList < Object > ( ) ;
ends3 . add ( 3 ) ;
ends3 . add ( 17 ) ;
expectedEndOffsetsArray . put ( " 3 " , ends3 ) ;
XContentBuilder mapping = XContentFactory . jsonBuilder ( ) . startObject ( ) . startObject ( " type1 " ) . startObject ( " properties " )
. startObject ( " int_payload_field " ) . field ( " type " , " string " ) . field ( " index_options " , " offsets " )
. field ( " analyzer " , " payload_int " ) . endObject ( ) . endObject ( ) . endObject ( ) . endObject ( ) ;
ElasticsearchAssertions . assertAcked ( prepareCreate ( " test " ) . addMapping ( " type1 " , mapping ) . setSettings (
ImmutableSettings . settingsBuilder ( ) . put ( " index.analysis.analyzer.payload_int.tokenizer " , " whitespace " )
. putArray ( " index.analysis.analyzer.payload_int.filter " , " delimited_int " )
. put ( " index.analysis.filter.delimited_int.delimiter " , " | " )
. put ( " index.analysis.filter.delimited_int.encoding " , " int " )
. put ( " index.analysis.filter.delimited_int.type " , " delimited_payload_filter " )
. put ( " index.number_of_replicas " , 0 ) . put ( " index.number_of_shards " , randomIntBetween ( 1 , 6 ) ) ) ) ;
indexRandom ( true , client ( ) . prepareIndex ( " test " , " type1 " , " 1 " ) . setSource ( " int_payload_field " , " a|1 b|2 b|3 c|4 d " ) , client ( )
. prepareIndex ( " test " , " type1 " , " 2 " ) . setSource ( " int_payload_field " , " b|1 b|2 c|3 d|4 a " ) ,
client ( ) . prepareIndex ( " test " , " type1 " , " 3 " ) . setSource ( " int_payload_field " , " b|1 c|2 d|3 a|4 b " ) ) ;
ensureGreen ( ) ;
}
@Test
public void testTwoScripts ( ) throws Exception {
initTestData ( ) ;
// check term frequencies for 'a'
2014-01-02 13:54:40 +01:00
String scriptFieldScript = " term = _index['int_payload_field']['c']; term.tf() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
scriptFieldScript = " 1 " ;
2014-01-02 13:54:40 +01:00
String scoreScript = " term = _index['int_payload_field']['b']; term.tf() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
Map < String , Object > expectedResultsField = new HashMap < String , Object > ( ) ;
expectedResultsField . put ( " 1 " , 1 ) ;
expectedResultsField . put ( " 2 " , 1 ) ;
expectedResultsField . put ( " 3 " , 1 ) ;
Map < String , Object > expectedResultsScore = new HashMap < String , Object > ( ) ;
expectedResultsScore . put ( " 1 " , 2f ) ;
expectedResultsScore . put ( " 2 " , 2f ) ;
expectedResultsScore . put ( " 3 " , 2f ) ;
checkOnlyFunctionScore ( scoreScript , expectedResultsScore , 3 ) ;
checkValueInEachDocWithFunctionScore ( scriptFieldScript , expectedResultsField , scoreScript , expectedResultsScore , 3 ) ;
}
@Test
public void testCallWithDifferentFlagsFails ( ) throws Exception {
initTestData ( ) ;
// should throw an exception, we cannot call with different flags twice
// if the flags of the second call were not included in the first call.
2014-01-02 13:54:40 +01:00
String script = " term = _index['int_payload_field']['b']; return _index['int_payload_field'].get('b', _POSITIONS).tf(); " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
try {
client ( ) . prepareSearch ( " test " ) . setQuery ( QueryBuilders . matchAllQuery ( ) ) . addScriptField ( " tvtest " , script ) . execute ( ) . actionGet ( ) ;
} catch ( SearchPhaseExecutionException e ) {
assertThat (
e . getDetailedMessage ( )
. indexOf (
2014-01-02 13:54:40 +01:00
" You must call get with all required flags! Instead of _index['int_payload_field'].get('b', _FREQUENCIES) and _index['int_payload_field'].get('b', _POSITIONS) call _index['int_payload_field'].get('b', _FREQUENCIES | _POSITIONS) once]; " ) ,
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
Matchers . greaterThan ( - 1 ) ) ;
}
// Should not throw an exception this way round
2014-01-02 13:54:40 +01:00
script = " term = _index['int_payload_field'].get('b', _POSITIONS | _FREQUENCIES);return _index['int_payload_field']['b'].tf(); " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
client ( ) . prepareSearch ( " test " ) . setQuery ( QueryBuilders . matchAllQuery ( ) ) . addScriptField ( " tvtest " , script ) . execute ( ) . actionGet ( ) ;
}
private void checkOnlyFunctionScore ( String scoreScript , Map < String , Object > expectedScore , int numExpectedDocs ) {
SearchResponse sr = client ( ) . prepareSearch ( " test " )
. setQuery ( QueryBuilders . functionScoreQuery ( ScoreFunctionBuilders . scriptFunction ( scoreScript ) ) ) . execute ( ) . actionGet ( ) ;
ElasticsearchAssertions . assertHitCount ( sr , numExpectedDocs ) ;
for ( SearchHit hit : sr . getHits ( ) . getHits ( ) ) {
assertThat ( " for doc " + hit . getId ( ) , ( ( Float ) expectedScore . get ( hit . getId ( ) ) ) . doubleValue ( ) ,
Matchers . closeTo ( hit . score ( ) , 1 . e - 4 ) ) ;
}
}
@Test
public void testDocumentationExample ( ) throws Exception {
initTestData ( ) ;
2014-01-02 13:54:40 +01:00
String script = " term = _index['float_payload_field'].get('b', " + includeAllFlag
+ " ); payloadSum=0; for (pos : term) {payloadSum = pos.payloadAsInt(0);} return payloadSum; " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
// non existing field: sum should be 0
HashMap < String , Object > zeroArray = new HashMap < String , Object > ( ) ;
zeroArray . put ( " 1 " , 0 ) ;
zeroArray . put ( " 2 " , 0 ) ;
zeroArray . put ( " 3 " , 0 ) ;
checkValueInEachDoc ( script , zeroArray , 3 ) ;
2014-01-02 13:54:40 +01:00
script = " term = _index['int_payload_field'].get('b', " + includeAllFlag
+ " ); payloadSum=0; for (pos : term) {payloadSum = payloadSum + pos.payloadAsInt(0);} return payloadSum; " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
// existing field: sums should be as here:
zeroArray . put ( " 1 " , 5 ) ;
zeroArray . put ( " 2 " , 3 ) ;
zeroArray . put ( " 3 " , 1 ) ;
checkValueInEachDoc ( script , zeroArray , 3 ) ;
}
@Test
public void testIteratorAndRecording ( ) throws Exception {
initTestData ( ) ;
// call twice with record: should work as expected
String script = createPositionsArrayScriptIterateTwice ( " b " , includeAllFlag , " position " ) ;
checkArrayValsInEachDoc ( script , expectedPositionsArray , 3 ) ;
script = createPositionsArrayScriptIterateTwice ( " b " , includeAllFlag , " startOffset " ) ;
checkArrayValsInEachDoc ( script , expectedStartOffsetsArray , 3 ) ;
script = createPositionsArrayScriptIterateTwice ( " b " , includeAllFlag , " endOffset " ) ;
checkArrayValsInEachDoc ( script , expectedEndOffsetsArray , 3 ) ;
script = createPositionsArrayScriptIterateTwice ( " b " , includeAllFlag , " payloadAsInt(-1) " ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 3 ) ;
// no record and get iterator twice: should fail
script = createPositionsArrayScriptIterateTwice ( " b " , includeAllWithoutRecordFlag , " position " ) ;
checkExceptions ( script ) ;
script = createPositionsArrayScriptIterateTwice ( " b " , includeAllWithoutRecordFlag , " startOffset " ) ;
checkExceptions ( script ) ;
script = createPositionsArrayScriptIterateTwice ( " b " , includeAllWithoutRecordFlag , " endOffset " ) ;
checkExceptions ( script ) ;
script = createPositionsArrayScriptIterateTwice ( " b " , includeAllWithoutRecordFlag , " payloadAsInt(-1) " ) ;
checkExceptions ( script ) ;
2014-01-02 13:54:40 +01:00
// no record and get termObject twice and iterate: should fail
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
script = createPositionsArrayScriptGetInfoObjectTwice ( " b " , includeAllWithoutRecordFlag , " position " ) ;
checkExceptions ( script ) ;
script = createPositionsArrayScriptGetInfoObjectTwice ( " b " , includeAllWithoutRecordFlag , " startOffset " ) ;
checkExceptions ( script ) ;
script = createPositionsArrayScriptGetInfoObjectTwice ( " b " , includeAllWithoutRecordFlag , " endOffset " ) ;
checkExceptions ( script ) ;
script = createPositionsArrayScriptGetInfoObjectTwice ( " b " , includeAllWithoutRecordFlag , " payloadAsInt(-1) " ) ;
checkExceptions ( script ) ;
}
private String createPositionsArrayScriptGetInfoObjectTwice ( String term , String flags , String what ) {
2014-01-02 13:54:40 +01:00
String script = " term = _index['int_payload_field'].get(' " + term + " ', " + flags
+ " ); array=[]; for (pos : term) {array.add(pos. " + what + " )} ;_index['int_payload_field'].get(' " + term + " ', "
+ flags + " ); array=[]; for (pos : term) {array.add(pos. " + what + " )} " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
return script ;
}
private String createPositionsArrayScriptIterateTwice ( String term , String flags , String what ) {
2014-01-02 13:54:40 +01:00
String script = " term = _index['int_payload_field'].get(' " + term + " ', " + flags
+ " ); array=[]; for (pos : term) {array.add(pos. " + what + " )} array=[]; for (pos : term) {array.add(pos. " + what
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
+ " )} return array; " ;
return script ;
}
private String createPositionsArrayScript ( String field , String term , String flags , String what ) {
2014-01-02 13:54:40 +01:00
String script = " term = _index[' " + field + " '].get(' " + term + " ', " + flags
+ " ); array=[]; for (pos : term) {array.add(pos. " + what + " )} return array; " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
return script ;
}
private String createPositionsArrayScriptDefaultGet ( String field , String term , String what ) {
2014-01-02 13:54:40 +01:00
String script = " term = _index[' " + field + " '][' " + term + " ']; array=[]; for (pos : term) {array.add(pos. " + what
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
+ " )} return array; " ;
return script ;
}
@Test
public void testFlags ( ) throws Exception {
initTestData ( ) ;
// check default flag
String script = createPositionsArrayScriptDefaultGet ( " int_payload_field " , " b " , " position " ) ;
// there should be no positions
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScriptDefaultGet ( " int_payload_field " , " b " , " startOffset " ) ;
// there should be no offsets
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScriptDefaultGet ( " int_payload_field " , " b " , " endOffset " ) ;
// there should be no offsets
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScriptDefaultGet ( " int_payload_field " , " b " , " payloadAsInt(-1) " ) ;
// there should be no payload
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
// check FLAG_FREQUENCIES flag
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _FREQUENCIES " , " position " ) ;
// there should be no positions
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _FREQUENCIES " , " startOffset " ) ;
// there should be no offsets
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _FREQUENCIES " , " endOffset " ) ;
// there should be no offsets
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _FREQUENCIES " , " payloadAsInt(-1) " ) ;
// there should be no payloads
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
// check FLAG_POSITIONS flag
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _POSITIONS " , " position " ) ;
// there should be positions
checkArrayValsInEachDoc ( script , expectedPositionsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _POSITIONS " , " startOffset " ) ;
// there should be no offsets
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _POSITIONS " , " endOffset " ) ;
// there should be no offsets
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _POSITIONS " , " payloadAsInt(-1) " ) ;
// there should be no payloads
checkArrayValsInEachDoc ( script , emptyArray , 3 ) ;
// check FLAG_OFFSETS flag
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _OFFSETS " , " position " ) ;
// there should be positions and s forth ...
checkArrayValsInEachDoc ( script , expectedPositionsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _OFFSETS " , " startOffset " ) ;
checkArrayValsInEachDoc ( script , expectedStartOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _OFFSETS " , " endOffset " ) ;
checkArrayValsInEachDoc ( script , expectedEndOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _OFFSETS " , " payloadAsInt(-1) " ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 3 ) ;
// check FLAG_PAYLOADS flag
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _PAYLOADS " , " position " ) ;
checkArrayValsInEachDoc ( script , expectedPositionsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _PAYLOADS " , " startOffset " ) ;
checkArrayValsInEachDoc ( script , expectedStartOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _PAYLOADS " , " endOffset " ) ;
checkArrayValsInEachDoc ( script , expectedEndOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , " _PAYLOADS " , " payloadAsInt(-1) " ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 3 ) ;
// check all flags
String allFlags = " _POSITIONS | _OFFSETS | _PAYLOADS " ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , allFlags , " position " ) ;
checkArrayValsInEachDoc ( script , expectedPositionsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , allFlags , " startOffset " ) ;
checkArrayValsInEachDoc ( script , expectedStartOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , allFlags , " endOffset " ) ;
checkArrayValsInEachDoc ( script , expectedEndOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , allFlags , " payloadAsInt(-1) " ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 3 ) ;
// check all flags without record
script = createPositionsArrayScript ( " int_payload_field " , " b " , includeAllWithoutRecordFlag , " position " ) ;
checkArrayValsInEachDoc ( script , expectedPositionsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , includeAllWithoutRecordFlag , " startOffset " ) ;
checkArrayValsInEachDoc ( script , expectedStartOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , includeAllWithoutRecordFlag , " endOffset " ) ;
checkArrayValsInEachDoc ( script , expectedEndOffsetsArray , 3 ) ;
script = createPositionsArrayScript ( " int_payload_field " , " b " , includeAllWithoutRecordFlag , " payloadAsInt(-1) " ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 3 ) ;
}
private void checkArrayValsInEachDoc ( String script , HashMap < String , List < Object > > expectedArray , int expectedHitSize ) {
SearchResponse sr = client ( ) . prepareSearch ( " test " ) . setQuery ( QueryBuilders . matchAllQuery ( ) ) . addScriptField ( " tvtest " , script )
. execute ( ) . actionGet ( ) ;
ElasticsearchAssertions . assertHitCount ( sr , expectedHitSize ) ;
int nullCounter = 0 ;
for ( SearchHit hit : sr . getHits ( ) . getHits ( ) ) {
Object result = hit . getFields ( ) . get ( " tvtest " ) . getValues ( ) . get ( 0 ) ;
Object expectedResult = expectedArray . get ( hit . getId ( ) ) ;
assertThat ( " for doc " + hit . getId ( ) , result , equalTo ( expectedResult ) ) ;
if ( expectedResult ! = null ) {
nullCounter + + ;
}
}
assertThat ( nullCounter , equalTo ( expectedArray . size ( ) ) ) ;
}
@Test
public void testAllExceptPosAndOffset ( ) throws Exception {
XContentBuilder mapping = XContentFactory . jsonBuilder ( ) . startObject ( ) . startObject ( " type1 " ) . startObject ( " properties " )
. startObject ( " float_payload_field " ) . field ( " type " , " string " ) . field ( " index_options " , " offsets " ) . field ( " term_vector " , " no " )
. field ( " analyzer " , " payload_float " ) . endObject ( ) . startObject ( " string_payload_field " ) . field ( " type " , " string " )
. field ( " index_options " , " offsets " ) . field ( " term_vector " , " no " ) . field ( " analyzer " , " payload_string " ) . endObject ( )
. startObject ( " int_payload_field " ) . field ( " type " , " string " ) . field ( " index_options " , " offsets " )
. field ( " analyzer " , " payload_int " ) . endObject ( ) . endObject ( ) . endObject ( ) . endObject ( ) ;
ElasticsearchAssertions . assertAcked ( prepareCreate ( " test " ) . addMapping ( " type1 " , mapping ) . setSettings (
ImmutableSettings . settingsBuilder ( ) . put ( " index.analysis.analyzer.payload_float.tokenizer " , " whitespace " )
. putArray ( " index.analysis.analyzer.payload_float.filter " , " delimited_float " )
. put ( " index.analysis.filter.delimited_float.delimiter " , " | " )
. put ( " index.analysis.filter.delimited_float.encoding " , " float " )
. put ( " index.analysis.filter.delimited_float.type " , " delimited_payload_filter " )
. put ( " index.analysis.analyzer.payload_string.tokenizer " , " whitespace " )
. putArray ( " index.analysis.analyzer.payload_string.filter " , " delimited_string " )
. put ( " index.analysis.filter.delimited_string.delimiter " , " | " )
. put ( " index.analysis.filter.delimited_string.encoding " , " identity " )
. put ( " index.analysis.filter.delimited_string.type " , " delimited_payload_filter " )
. put ( " index.analysis.analyzer.payload_int.tokenizer " , " whitespace " )
. putArray ( " index.analysis.analyzer.payload_int.filter " , " delimited_int " )
. put ( " index.analysis.filter.delimited_int.delimiter " , " | " )
. put ( " index.analysis.filter.delimited_int.encoding " , " int " )
. put ( " index.analysis.filter.delimited_int.type " , " delimited_payload_filter " ) . put ( " index.number_of_replicas " , 0 )
. put ( " index.number_of_shards " , 1 ) ) ) ;
ensureYellow ( ) ;
indexRandom ( true , client ( ) . prepareIndex ( " test " , " type1 " , " 1 " ) . setSource ( " float_payload_field " , " a|1 b|2 a|3 b " ) , client ( )
. prepareIndex ( " test " , " type1 " , " 2 " ) . setSource ( " string_payload_field " , " a|a b|b a|a b " ) ,
client ( ) . prepareIndex ( " test " , " type1 " , " 3 " ) . setSource ( " float_payload_field " , " a|4 b|5 a|6 b " ) ,
client ( ) . prepareIndex ( " test " , " type1 " , " 4 " ) . setSource ( " string_payload_field " , " a|b b|a a|b b " ) ,
client ( ) . prepareIndex ( " test " , " type1 " , " 5 " ) . setSource ( " float_payload_field " , " c " ) ,
client ( ) . prepareIndex ( " test " , " type1 " , " 6 " ) . setSource ( " int_payload_field " , " c|1 " ) ) ;
// get the number of all docs
2014-01-02 13:54:40 +01:00
String script = " _index.numDocs() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
checkValueInEachDoc ( 6 , script , 6 ) ;
// get the number of docs with field float_payload_field
2014-01-02 13:54:40 +01:00
script = " _index['float_payload_field'].docCount() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
checkValueInEachDoc ( 3 , script , 6 ) ;
// corner case: what if the field does not exist?
2014-01-02 13:54:40 +01:00
script = " _index['non_existent_field'].docCount() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
checkValueInEachDoc ( 0 , script , 6 ) ;
// get the number of all tokens in all docs
2014-01-02 13:54:40 +01:00
script = " _index['float_payload_field'].sumttf() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
checkValueInEachDoc ( 9 , script , 6 ) ;
// corner case get the number of all tokens in all docs for non existent
// field
2014-01-02 13:54:40 +01:00
script = " _index['non_existent_field'].sumttf() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
checkValueInEachDoc ( 0 , script , 6 ) ;
// get the sum of doc freqs in all docs
2014-01-02 13:54:40 +01:00
script = " _index['float_payload_field'].sumdf() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
checkValueInEachDoc ( 5 , script , 6 ) ;
// get the sum of doc freqs in all docs for non existent field
2014-01-02 13:54:40 +01:00
script = " _index['non_existent_field'].sumdf() " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
checkValueInEachDoc ( 0 , script , 6 ) ;
// check term frequencies for 'a'
2014-01-02 13:54:40 +01:00
script = " term = _index['float_payload_field']['a']; if (term != null) {term.tf()} " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
Map < String , Object > expectedResults = new HashMap < String , Object > ( ) ;
expectedResults . put ( " 1 " , 2 ) ;
expectedResults . put ( " 2 " , 0 ) ;
expectedResults . put ( " 3 " , 2 ) ;
expectedResults . put ( " 4 " , 0 ) ;
expectedResults . put ( " 5 " , 0 ) ;
expectedResults . put ( " 6 " , 0 ) ;
checkValueInEachDoc ( script , expectedResults , 6 ) ;
expectedResults . clear ( ) ;
// check doc frequencies for 'c'
2014-01-02 13:54:40 +01:00
script = " term = _index['float_payload_field']['c']; if (term != null) {term.df()} " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
expectedResults . put ( " 1 " , 1l ) ;
expectedResults . put ( " 2 " , 1l ) ;
expectedResults . put ( " 3 " , 1l ) ;
expectedResults . put ( " 4 " , 1l ) ;
expectedResults . put ( " 5 " , 1l ) ;
expectedResults . put ( " 6 " , 1l ) ;
checkValueInEachDoc ( script , expectedResults , 6 ) ;
expectedResults . clear ( ) ;
// check doc frequencies for term that does not exist
2014-01-02 13:54:40 +01:00
script = " term = _index['float_payload_field']['non_existent_term']; if (term != null) {term.df()} " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
expectedResults . put ( " 1 " , 0l ) ;
expectedResults . put ( " 2 " , 0l ) ;
expectedResults . put ( " 3 " , 0l ) ;
expectedResults . put ( " 4 " , 0l ) ;
expectedResults . put ( " 5 " , 0l ) ;
expectedResults . put ( " 6 " , 0l ) ;
checkValueInEachDoc ( script , expectedResults , 6 ) ;
expectedResults . clear ( ) ;
// check doc frequencies for term that does not exist
2014-01-02 13:54:40 +01:00
script = " term = _index['non_existent_field']['non_existent_term']; if (term != null) {term.tf()} " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
expectedResults . put ( " 1 " , 0 ) ;
expectedResults . put ( " 2 " , 0 ) ;
expectedResults . put ( " 3 " , 0 ) ;
expectedResults . put ( " 4 " , 0 ) ;
expectedResults . put ( " 5 " , 0 ) ;
expectedResults . put ( " 6 " , 0 ) ;
checkValueInEachDoc ( script , expectedResults , 6 ) ;
expectedResults . clear ( ) ;
// check total term frequencies for 'a'
2014-01-02 13:54:40 +01:00
script = " term = _index['float_payload_field']['a']; if (term != null) {term.ttf()} " ;
make term statistics accessible in scripts
term statistics can be accessed via the _shard variable.
Below is a minimal example. See documentation on details.
```
DELETE paytest
PUT paytest
{
"mappings": {
"test": {
"_all": {
"auto_boost": true,
"enabled": true
},
"properties": {
"text": {
"index_analyzer": "fulltext_analyzer",
"store": "yes",
"type": "string"
}
}
}
},
"settings": {
"analysis": {
"analyzer": {
"fulltext_analyzer": {
"filter": [
"my_delimited_payload_filter"
],
"tokenizer": "whitespace",
"type": "custom"
}
},
"filter": {
"my_delimited_payload_filter": {
"delimiter": "+",
"encoding": "float",
"type": "delimited_payload_filter"
}
}
},
"index": {
"number_of_replicas": 0,
"number_of_shards": 1
}
}
}
POST paytest/test/1
{
"text": "the+1 quick+2 brown+3 fox+4 is quick+10"
}
POST paytest/test/2
{
"text": "the+1 quick+2 red+3 fox+4"
}
POST paytest/_refresh
POST paytest/_search
{
"script_fields": {
"ttf": {
"script": "_shard[\"text\"][\"quick\"].ttf()"
}
}
}
POST paytest/_search
{
"script_fields": {
"freq": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
}
POST paytest/test/2/_termvector
POST paytest/_search
{
"script_fields": {
"payloads": {
"script": "term = _shard[\"text\"].get(\"red\",_PAYLOADS);payloads = []; for(pos : term){payloads.add(pos.payloadAsFloat(-1));} return payloads;"
}
}
}
POST paytest/_search
{
"script_fields": {
"tv": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
},
"query": {
"function_score": {
"functions": [
{
"script_score": {
"script": "_shard[\"text\"][\"quick\"].freq()"
}
}
]
}
}
}
```
closes #3772
2014-01-02 11:17:33 +01:00
expectedResults . put ( " 1 " , 4l ) ;
expectedResults . put ( " 2 " , 4l ) ;
expectedResults . put ( " 3 " , 4l ) ;
expectedResults . put ( " 4 " , 4l ) ;
expectedResults . put ( " 5 " , 4l ) ;
expectedResults . put ( " 6 " , 4l ) ;
checkValueInEachDoc ( script , expectedResults , 6 ) ;
expectedResults . clear ( ) ;
// check float payload for 'b'
HashMap < String , List < Object > > expectedPayloadsArray = new HashMap < String , List < Object > > ( ) ;
script = createPositionsArrayScript ( " float_payload_field " , " b " , includeAllFlag , " payloadAsFloat(-1) " ) ;
float missingValue = - 1 ;
List < Object > payloadsFor1 = new ArrayList < Object > ( ) ;
payloadsFor1 . add ( 2f ) ;
payloadsFor1 . add ( missingValue ) ;
expectedPayloadsArray . put ( " 1 " , payloadsFor1 ) ;
List < Object > payloadsFor2 = new ArrayList < Object > ( ) ;
payloadsFor2 . add ( 5f ) ;
payloadsFor2 . add ( missingValue ) ;
expectedPayloadsArray . put ( " 3 " , payloadsFor2 ) ;
expectedPayloadsArray . put ( " 6 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 5 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 4 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 2 " , new ArrayList < Object > ( ) ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 6 ) ;
// check string payload for 'b'
expectedPayloadsArray . clear ( ) ;
payloadsFor1 . clear ( ) ;
payloadsFor2 . clear ( ) ;
script = createPositionsArrayScript ( " string_payload_field " , " b " , includeAllFlag , " payloadAsString() " ) ;
payloadsFor1 . add ( " b " ) ;
payloadsFor1 . add ( null ) ;
expectedPayloadsArray . put ( " 2 " , payloadsFor1 ) ;
payloadsFor2 . add ( " a " ) ;
payloadsFor2 . add ( null ) ;
expectedPayloadsArray . put ( " 4 " , payloadsFor2 ) ;
expectedPayloadsArray . put ( " 6 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 5 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 3 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 1 " , new ArrayList < Object > ( ) ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 6 ) ;
// check int payload for 'c'
expectedPayloadsArray . clear ( ) ;
payloadsFor1 . clear ( ) ;
payloadsFor2 . clear ( ) ;
script = createPositionsArrayScript ( " int_payload_field " , " c " , includeAllFlag , " payloadAsInt(-1) " ) ;
payloadsFor1 = new ArrayList < Object > ( ) ;
payloadsFor1 . add ( 1 ) ;
expectedPayloadsArray . put ( " 6 " , payloadsFor1 ) ;
expectedPayloadsArray . put ( " 5 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 4 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 3 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 2 " , new ArrayList < Object > ( ) ) ;
expectedPayloadsArray . put ( " 1 " , new ArrayList < Object > ( ) ) ;
checkArrayValsInEachDoc ( script , expectedPayloadsArray , 6 ) ;
}
private void checkExceptions ( String script ) {
try {
SearchResponse sr = client ( ) . prepareSearch ( " test " ) . setQuery ( QueryBuilders . matchAllQuery ( ) ) . addScriptField ( " tvtest " , script )
. execute ( ) . actionGet ( ) ;
assertThat ( sr . getHits ( ) . hits ( ) . length , equalTo ( 0 ) ) ;
ShardSearchFailure [ ] shardFails = sr . getShardFailures ( ) ;
for ( ShardSearchFailure fail : shardFails ) {
assertThat ( fail . reason ( ) . indexOf ( " Cannot iterate twice! If you want to iterate more that once, add _CACHE explicitely. " ) ,
Matchers . greaterThan ( - 1 ) ) ;
}
} catch ( SearchPhaseExecutionException ex ) {
assertThat (
ex . getDetailedMessage ( ) . indexOf ( " Cannot iterate twice! If you want to iterate more that once, add _CACHE explicitely. " ) ,
Matchers . greaterThan ( - 1 ) ) ;
}
}
private void checkValueInEachDocWithFunctionScore ( String fieldScript , Map < String , Object > expectedFieldVals , String scoreScript ,
Map < String , Object > expectedScore , int numExpectedDocs ) {
SearchResponse sr = client ( ) . prepareSearch ( " test " )
. setQuery ( QueryBuilders . functionScoreQuery ( ScoreFunctionBuilders . scriptFunction ( scoreScript ) ) )
. addScriptField ( " tvtest " , fieldScript ) . execute ( ) . actionGet ( ) ;
ElasticsearchAssertions . assertHitCount ( sr , numExpectedDocs ) ;
for ( SearchHit hit : sr . getHits ( ) . getHits ( ) ) {
Object result = hit . getFields ( ) . get ( " tvtest " ) . getValues ( ) . get ( 0 ) ;
Object expectedResult = expectedFieldVals . get ( hit . getId ( ) ) ;
assertThat ( " for doc " + hit . getId ( ) , result , equalTo ( expectedResult ) ) ;
assertThat ( " for doc " + hit . getId ( ) , ( ( Float ) expectedScore . get ( hit . getId ( ) ) ) . doubleValue ( ) ,
Matchers . closeTo ( hit . score ( ) , 1 . e - 4 ) ) ;
}
}
private void checkValueInEachDoc ( String script , Map < String , Object > expectedResults , int numExpectedDocs ) {
SearchResponse sr = client ( ) . prepareSearch ( " test " ) . setQuery ( QueryBuilders . matchAllQuery ( ) ) . addScriptField ( " tvtest " , script )
. execute ( ) . actionGet ( ) ;
ElasticsearchAssertions . assertHitCount ( sr , numExpectedDocs ) ;
for ( SearchHit hit : sr . getHits ( ) . getHits ( ) ) {
Object result = hit . getFields ( ) . get ( " tvtest " ) . getValues ( ) . get ( 0 ) ;
Object expectedResult = expectedResults . get ( hit . getId ( ) ) ;
assertThat ( " for doc " + hit . getId ( ) , result , equalTo ( expectedResult ) ) ;
}
}
private void checkValueInEachDoc ( int value , String script , int numExpectedDocs ) {
SearchResponse sr = client ( ) . prepareSearch ( " test " ) . setQuery ( QueryBuilders . matchAllQuery ( ) ) . addScriptField ( " tvtest " , script )
. execute ( ) . actionGet ( ) ;
ElasticsearchAssertions . assertHitCount ( sr , numExpectedDocs ) ;
for ( SearchHit hit : sr . getHits ( ) . getHits ( ) ) {
Object result = hit . getFields ( ) . get ( " tvtest " ) . getValues ( ) . get ( 0 ) ;
if ( result instanceof Integer ) {
assertThat ( ( ( Integer ) result ) . intValue ( ) , equalTo ( value ) ) ;
} else if ( result instanceof Long ) {
assertThat ( ( ( Long ) result ) . intValue ( ) , equalTo ( value ) ) ;
} else {
assert false ;
}
}
}
}