mirror of
https://github.com/spring-projects/spring-data-elasticsearch.git
synced 2025-10-15 23:08:54 +00:00
318 lines
11 KiB
Plaintext
318 lines
11 KiB
Plaintext
[[elasticsearch.misc]]
|
|
= Miscellaneous Elasticsearch Operation Support
|
|
|
|
This chapter covers additional support for Elasticsearch operations that cannot be directly accessed via the repository interface.
|
|
It is recommended to add those operations as custom implementation as described in <<repositories.custom-implementations>> .
|
|
|
|
[[elasticsearc.misc.index.settings]]
|
|
== Index settings
|
|
|
|
When creating Elasticsearch indices with Spring Data Elasticsearch different index settings can be defined by using the `@Setting` annotation.
|
|
The following arguments are available:
|
|
|
|
* `useServerConfiguration` does not send any settings parameters, so the Elasticsearch server configuration determines them.
|
|
* `settingPath` refers to a JSON file defining the settings that must be resolvable in the classpath
|
|
* `shards` the number of shards to use, defaults to _1_
|
|
* `replicas` the number of replicas, defaults to _1_
|
|
* `refreshIntervall`, defaults to _"1s"_
|
|
* `indexStoreType`, defaults to _"fs"_
|
|
|
|
|
|
It is as well possible to define https://www.elastic.co/guide/en/elasticsearch/reference/7.11/index-modules-index-sorting.html[index sorting] (check the linked Elasticsearch documentation for the possible field types and values):
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
@Document(indexName = "entities")
|
|
@Setting(
|
|
sortFields = { "secondField", "firstField" }, <.>
|
|
sortModes = { Setting.SortMode.max, Setting.SortMode.min }, <.>
|
|
sortOrders = { Setting.SortOrder.desc, Setting.SortOrder.asc },
|
|
sortMissingValues = { Setting.SortMissing._last, Setting.SortMissing._first })
|
|
class Entity {
|
|
@Nullable
|
|
@Id private String id;
|
|
|
|
@Nullable
|
|
@Field(name = "first_field", type = FieldType.Keyword)
|
|
private String firstField;
|
|
|
|
@Nullable @Field(name = "second_field", type = FieldType.Keyword)
|
|
private String secondField;
|
|
|
|
// getter and setter...
|
|
}
|
|
----
|
|
|
|
<.> when defining sort fields, use the name of the Java property (_firstField_), not the name that might be defined for Elasticsearch (_first_field_)
|
|
<.> `sortModes`, `sortOrders` and `sortMissingValues` are optional, but if they are set, the number of entries must match the number of `sortFields` elements
|
|
====
|
|
|
|
[[elasticsearch.misc.mappings]]
|
|
== Index Mapping
|
|
|
|
When Spring Data Elasticsearch creates the index mapping with the `IndexOperations.createMapping()` methods, it uses the annotations described in <<elasticsearch.mapping.meta-model.annotations>>, especially the `@Field` annotation.
|
|
In addition to that it is possible to add the `@Mapping` annotation to a class.
|
|
This annotation has the following properties:
|
|
|
|
* `mappingPath` a classpath resource in JSON format; if this is not empty it is used as the mapping, no other mapping processing is done.
|
|
* `enabled` when set to false, this flag is written to the mapping and no further processing is done.
|
|
* `dateDetection` and `numericDetection` set the corresponding properties in the mapping when not set to `DEFAULT`.
|
|
* `dynamicDateFormats` when this String array is not empty, it defines the date formats used for automatic date detection.
|
|
* `runtimeFieldsPath` a classpath resource in JSON format containing the definition of runtime fields which is written to the index mappings, for example:
|
|
|
|
====
|
|
[source,json]
|
|
----
|
|
{
|
|
"day_of_week": {
|
|
"type": "keyword",
|
|
"script": {
|
|
"source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))"
|
|
}
|
|
}
|
|
}
|
|
----
|
|
====
|
|
|
|
[[elasticsearch.misc.filter]]
|
|
== Filter Builder
|
|
|
|
Filter Builder improves query speed.
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
private ElasticsearchOperations operations;
|
|
|
|
IndexCoordinates index = IndexCoordinates.of("sample-index");
|
|
|
|
Query query = NativeQuery.builder()
|
|
.withQuery(q -> q
|
|
.matchAll(ma -> ma))
|
|
.withFilter( q -> q
|
|
.bool(b -> b
|
|
.must(m -> m
|
|
.term(t -> t
|
|
.field("id")
|
|
.value(documentId))
|
|
)))
|
|
.build();
|
|
|
|
SearchHits<SampleEntity> sampleEntities = operations.search(query, SampleEntity.class, index);
|
|
----
|
|
====
|
|
|
|
[[elasticsearch.scroll]]
|
|
== Using Scroll For Big Result Set
|
|
|
|
Elasticsearch has a scroll API for getting big result set in chunks.
|
|
This is internally used by Spring Data Elasticsearch to provide the implementations of the `<T> SearchHitsIterator<T> SearchOperations.searchForStream(Query query, Class<T> clazz, IndexCoordinates index)` method.
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
IndexCoordinates index = IndexCoordinates.of("sample-index");
|
|
|
|
Query searchQuery = NativeQuery.builder()
|
|
.withQuery(q -> q
|
|
.matchAll(ma -> ma))
|
|
.withFields("message")
|
|
.withPageable(PageRequest.of(0, 10))
|
|
.build();
|
|
|
|
SearchHitsIterator<SampleEntity> stream = elasticsearchOperations.searchForStream(searchQuery, SampleEntity.class,
|
|
index);
|
|
|
|
List<SampleEntity> sampleEntities = new ArrayList<>();
|
|
while (stream.hasNext()) {
|
|
sampleEntities.add(stream.next());
|
|
}
|
|
|
|
stream.close();
|
|
----
|
|
====
|
|
|
|
There are no methods in the `SearchOperations` API to access the scroll id, if it should be necessary to access this,
|
|
the following methods of the `AbstractElasticsearchTemplate` can be used (this is the base implementation for the
|
|
different `ElasticsearchOperations` implementations):
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
|
|
@Autowired ElasticsearchOperations operations;
|
|
|
|
AbstractElasticsearchTemplate template = (AbstractElasticsearchTemplate)operations;
|
|
|
|
IndexCoordinates index = IndexCoordinates.of("sample-index");
|
|
|
|
Query query = NativeQuery.builder()
|
|
.withQuery(q -> q
|
|
.matchAll(ma -> ma))
|
|
.withFields("message")
|
|
.withPageable(PageRequest.of(0, 10))
|
|
.build();
|
|
|
|
SearchScrollHits<SampleEntity> scroll = template.searchScrollStart(1000, query, SampleEntity.class, index);
|
|
|
|
String scrollId = scroll.getScrollId();
|
|
List<SampleEntity> sampleEntities = new ArrayList<>();
|
|
while (scroll.hasSearchHits()) {
|
|
sampleEntities.addAll(scroll.getSearchHits());
|
|
scrollId = scroll.getScrollId();
|
|
scroll = template.searchScrollContinue(scrollId, 1000, SampleEntity.class);
|
|
}
|
|
template.searchScrollClear(scrollId);
|
|
----
|
|
====
|
|
|
|
To use the Scroll API with repository methods, the return type must defined as `Stream` in the Elasticsearch Repository.
|
|
The implementation of the method will then use the scroll methods from the ElasticsearchTemplate.
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
interface SampleEntityRepository extends Repository<SampleEntity, String> {
|
|
|
|
Stream<SampleEntity> findBy();
|
|
|
|
}
|
|
----
|
|
====
|
|
|
|
[[elasticsearch.misc.sorts]]
|
|
== Sort options
|
|
|
|
In addition to the default sort options described in <<repositories.paging-and-sorting>>, Spring Data Elasticsearch provides the class `org.springframework.data.elasticsearch.core.query.Order` which derives from `org.springframework.data.domain.Sort.Order`.
|
|
It offers additional parameters that can be sent to Elasticsearch when specifying the sorting of the result (see https://www.elastic.co/guide/en/elasticsearch/reference/7.15/sort-search-results.html).
|
|
|
|
There also is the `org.springframework.data.elasticsearch.core.query.GeoDistanceOrder` class which can be used to have the result of a search operation ordered by geographical distance.
|
|
|
|
If the class to be retrieved has a `GeoPoint` property named _location_, the following `Sort` would sort the results by distance to the given point:
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
Sort.by(new GeoDistanceOrder("location", new GeoPoint(48.137154, 11.5761247)))
|
|
----
|
|
====
|
|
|
|
[[elasticsearch.misc.runtime-fields]]
|
|
== Runtime Fields
|
|
|
|
From version 7.12 on Elasticsearch has added the feature of runtime fields (https://www.elastic.co/guide/en/elasticsearch/reference/7.12/runtime.html).
|
|
Spring Data Elasticsearch supports this in two ways:
|
|
|
|
[[elasticsearch.misc.runtime-fields.index-mappings]]
|
|
=== Runtime field definitions in the index mappings
|
|
|
|
The first way to define runtime fields is by adding the definitions to the index mappings (see https://www.elastic.co/guide/en/elasticsearch/reference/7.12/runtime-mapping-fields.html).
|
|
To use this approach in Spring Data Elasticsearch the user must provide a JSON file that contains the corresponding definition, for example:
|
|
|
|
.runtime-fields.json
|
|
====
|
|
[source,json]
|
|
----
|
|
{
|
|
"day_of_week": {
|
|
"type": "keyword",
|
|
"script": {
|
|
"source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))"
|
|
}
|
|
}
|
|
}
|
|
----
|
|
====
|
|
|
|
The path to this JSON file, which must be present on the classpath, must then be set in the `@Mapping` annotation of the entity:
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
@Document(indexName = "runtime-fields")
|
|
@Mapping(runtimeFieldsPath = "/runtime-fields.json")
|
|
public class RuntimeFieldEntity {
|
|
// properties, getter, setter,...
|
|
}
|
|
|
|
----
|
|
====
|
|
|
|
[[elasticsearch.misc.runtime-fields.query]]
|
|
=== Runtime fields definitions set on a Query
|
|
|
|
The second way to define runtime fields is by adding the definitions to a search query (see https://www.elastic.co/guide/en/elasticsearch/reference/7.12/runtime-search-request.html).
|
|
The following code example shows how to do this with Spring Data Elasticsearch :
|
|
|
|
The entity used is a simple object that has a `price` property:
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
@Document(indexName = "some_index_name")
|
|
public class SomethingToBuy {
|
|
|
|
private @Id @Nullable String id;
|
|
@Nullable @Field(type = FieldType.Text) private String description;
|
|
@Nullable @Field(type = FieldType.Double) private Double price;
|
|
|
|
// getter and setter
|
|
}
|
|
|
|
----
|
|
====
|
|
|
|
The following query uses a runtime field that calculates a `priceWithTax` value by adding 19% to the price and uses this value in the search query to find all entities where `priceWithTax` is higher or equal than a given value:
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
RuntimeField runtimeField = new RuntimeField("priceWithTax", "double", "emit(doc['price'].value * 1.19)");
|
|
Query query = new CriteriaQuery(new Criteria("priceWithTax").greaterThanEqual(16.5));
|
|
query.addRuntimeField(runtimeField);
|
|
|
|
SearchHits<SomethingToBuy> searchHits = operations.search(query, SomethingToBuy.class);
|
|
----
|
|
====
|
|
|
|
This works with every implementation of the `Query` interface.
|
|
|
|
[[elasticsearch.misc.point-in-time]]
|
|
== Point In Time (PIT) API
|
|
|
|
`ElasticsearchOperations` supports the point in time API of Elasticsearch (see https://www.elastic.co/guide/en/elasticsearch/reference/8.3/point-in-time-api.html).
|
|
The following code snippet shows how to use this feature with a fictional `Person` class:
|
|
|
|
====
|
|
[source,java]
|
|
----
|
|
ElasticsearchOperations operations; // autowired
|
|
Duration tenSeconds = Duration.ofSeconds(10);
|
|
|
|
String pit = operations.openPointInTime(IndexCoordinates.of("person"), tenSeconds); <.>
|
|
|
|
// create query for the pit
|
|
Query query1 = new CriteriaQueryBuilder(Criteria.where("lastName").is("Smith"))
|
|
.withPointInTime(new Query.PointInTime(pit, tenSeconds)) <.>
|
|
.build();
|
|
SearchHits<Person> searchHits1 = operations.search(query1, Person.class);
|
|
// do something with the data
|
|
|
|
// create 2nd query for the pit, use the id returned in the previous result
|
|
Query query2 = new CriteriaQueryBuilder(Criteria.where("lastName").is("Miller"))
|
|
.withPointInTime(
|
|
new Query.PointInTime(searchHits1.getPointInTimeId(), tenSeconds)) <.>
|
|
.build();
|
|
SearchHits<Person> searchHits2 = operations.search(query2, Person.class);
|
|
// do something with the data
|
|
|
|
operations.closePointInTime(searchHits2.getPointInTimeId()); <.>
|
|
|
|
----
|
|
<.> create a point in time for an index (can be multiple names) and a keep-alive duration and retrieve its id
|
|
<.> pass that id into the query to search together with the next keep-alive value
|
|
<.> for the next query, use the id returned from the previous search
|
|
<.> when done, close the point in time using the last returned id
|
|
====
|