diff --git a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java index f7ff4dd1428..ea9b785b732 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java @@ -41,14 +41,31 @@ import org.elasticsearch.indices.fielddata.breaker.CircuitBreakerService; public interface IndexFieldData extends IndexComponent { public static class CommonSettings { + public static String SETTING_MEMORY_STORAGE_HINT = "memory_storage_hint"; + + public enum MemoryStorageFormat { + ORDINALS, PACKED, PAGED; + + public static MemoryStorageFormat fromString(String string) { + for (MemoryStorageFormat e : MemoryStorageFormat.values()) { + if (e.name().equalsIgnoreCase(string)) { + return e; + } + } + return null; + } + } /** - * Should single value cross documents case be optimized to remove ords. Note, this optimization - * might not be supported by all Field Data implementations, but the ones that do, should consult - * this method to check if it should be done or not. + * Gets a memory storage hint that should be honored if possible but is not mandatory */ - public static boolean removeOrdsOnSingleValue(FieldDataType fieldDataType) { - return !"always".equals(fieldDataType.getSettings().get("ordinals")); + public static MemoryStorageFormat getMemoryStorageHint(FieldDataType fieldDataType) { + // backwards compatibility + String s = fieldDataType.getSettings().get("ordinals"); + if (s != null) { + return "always".equals(s) ? MemoryStorageFormat.ORDINALS : null; + } + return MemoryStorageFormat.fromString(fieldDataType.getSettings().get(SETTING_MEMORY_STORAGE_HINT)); } } diff --git a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java index bb708a49f96..4e5b7252a17 100644 --- a/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java +++ b/src/main/java/org/elasticsearch/index/fielddata/plain/DoubleArrayIndexFieldData.java @@ -98,7 +98,9 @@ public class DoubleArrayIndexFieldData extends AbstractIndexFieldData Long.MIN_VALUE) { @@ -159,40 +160,66 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData 0) { + // last page estimation + pageIndex++; + if (pageMaxOrdinal == Long.MAX_VALUE) { + // empty page - will use the null reader which just stores size + pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT); + + } else { + long pageMinValue = values.get(pageMinOrdinal - 1); + long pageMaxValue = values.get(pageMaxOrdinal - 1); + long pageDelta = pageMaxValue - pageMinValue; + if (pageDelta != 0) { + bitsRequired = valuesDelta < 0 ? 64 : PackedInts.bitsRequired(pageDelta); + formatAndBits = PackedInts.fastestFormatAndBits(pageSize, bitsRequired, acceptableOverheadRatio); + pagedSingleValuesSize += formatAndBits.format.longCount(PackedInts.VERSION_CURRENT, pageSize, formatAndBits.bitsPerValue) * RamUsageEstimator.NUM_BYTES_LONG; + pagedSingleValuesSize += RamUsageEstimator.NUM_BYTES_LONG; // min value per page storage + } else { + // empty page + pagedSingleValuesSize += RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + RamUsageEstimator.NUM_BYTES_INT); + } + } + } + + if (ordinalsSize < singleValuesSize) { + if (ordinalsSize < pagedSingleValuesSize) { + format = CommonSettings.MemoryStorageFormat.ORDINALS; + } else { + format = CommonSettings.MemoryStorageFormat.PAGED; + } + } else { + if (pagedSingleValuesSize < singleValuesSize) { + format = CommonSettings.MemoryStorageFormat.PAGED; + } else { + format = CommonSettings.MemoryStorageFormat.PACKED; + } + } + return format; + } + @Override public XFieldComparatorSource comparatorSource(@Nullable Object missingValue, SortMode sortMode) { return new LongValuesComparatorSource(this, missingValue, sortMode); @@ -266,6 +380,7 @@ public class PackedArrayIndexFieldData extends AbstractIndexFieldData Indexing [" + COUNT + "] ..."); + long ITERS = COUNT / BATCH; + long i = 1; + int counter = 0; + long[] currentTimeInMillis1 = new long[]{System.currentTimeMillis()}; + long[] currentTimeInMillis2 = new long[]{System.currentTimeMillis()}; + long startTimeInMillis = currentTimeInMillis1[0]; + long averageMillisChange = TIME_PERIOD / COUNT * 2; + long backwardSkew = Math.max(1, (long) (averageMillisChange * 0.1)); + long bigOutOfOrder = 1; + for (; i <= ITERS; i++) { + BulkRequestBuilder request = client.prepareBulk(); + for (int j = 0; j < BATCH; j++) { + counter++; + + XContentBuilder builder = jsonBuilder().startObject(); + builder.field("id", Integer.toString(counter)); + // move forward in time and sometimes a little bit back (delayed delivery) + long diff = ThreadLocalRandom.current().nextLong(2 * averageMillisChange + 2 * backwardSkew) - backwardSkew; + long[] currentTime = counter % 2 == 0 ? currentTimeInMillis1 : currentTimeInMillis2; + currentTime[0] += diff; + if (ThreadLocalRandom.current().nextLong(100) <= bigOutOfOrder) { + builder.field("l_value", currentTime[0] - 60000); // 1m delays + } else { + builder.field("l_value", currentTime[0]); + } + + builder.endObject(); + + request.add(Requests.indexRequest("test").type("type1").id(Integer.toString(counter)) + .source(builder)); + } + BulkResponse response = request.execute().actionGet(); + if (response.hasFailures()) { + System.err.println("--> failures..."); + } + if (((i * BATCH) % 10000) == 0) { + System.out.println("--> Indexed " + (i * BATCH) + " took " + stopWatch.stop().lastTaskTime()); + stopWatch.start(); + } + } + System.out.println("--> Indexing took " + stopWatch.totalTime() + ", TPS " + (((double) (COUNT)) / stopWatch.totalTime().secondsFrac())); + System.out.println("Time range 1: " + (currentTimeInMillis1[0] - startTimeInMillis) / 1000.0 / 3600 + " hours"); + System.out.println("Time range 2: " + (currentTimeInMillis2[0] - startTimeInMillis) / 1000.0 / 3600 + " hours"); + System.out.println("--> optimizing index"); + client.admin().indices().prepareOptimize().setMaxNumSegments(1).get(); + } catch (IndexAlreadyExistsException e) { + System.out.println("--> Index already exists, ignoring indexing phase, waiting for green"); + ClusterHealthResponse clusterHealthResponse = client.admin().cluster().prepareHealth().setWaitForGreenStatus().setTimeout("10m").execute().actionGet(); + if (clusterHealthResponse.isTimedOut()) { + System.err.println("--> Timed out waiting for cluster health"); + } + } + client.admin().indices().prepareRefresh().execute().actionGet(); + COUNT = client.prepareCount().setQuery(matchAllQuery()).execute().actionGet().getCount(); + System.out.println("--> Number of docs in index: " + COUNT); + + // load with the reverse options to make sure jit doesn't optimize one away + setMapping(ACCEPTABLE_OVERHEAD_RATIO, MEMORY_FORMAT.equals(IndexFieldData.CommonSettings.MemoryStorageFormat.PACKED) ? IndexFieldData.CommonSettings.MemoryStorageFormat.PAGED : IndexFieldData.CommonSettings.MemoryStorageFormat.PACKED); + warmUp("hist_l", "l_value", MATCH_PERCENTAGE); + + setMapping(ACCEPTABLE_OVERHEAD_RATIO, MEMORY_FORMAT); + warmUp("hist_l", "l_value", MATCH_PERCENTAGE); + + List stats = Lists.newArrayList(); + stats.add(measureAgg("hist_l", "l_value", MATCH_PERCENTAGE)); + + NodesStatsResponse nodeStats = client.admin().cluster().prepareNodesStats(nodes[0].settings().get("name")).clear() + .setIndices(new CommonStatsFlags(CommonStatsFlags.Flag.FieldData)).get(); + + + System.out.println("------------------ SUMMARY -------------------------------"); + + System.out.println("docs: " + COUNT); + System.out.println("match percentage: " + MATCH_PERCENTAGE); + System.out.println("memory format hint: " + MEMORY_FORMAT); + System.out.println("acceptable_overhead_ratio: " + ACCEPTABLE_OVERHEAD_RATIO); + System.out.println("field data: " + nodeStats.getNodes()[0].getIndices().getFieldData().getMemorySize()); + System.out.format(Locale.ROOT, "%25s%10s%10s\n", "name", "took", "millis"); + for (StatsResult stat : stats) { + System.out.format(Locale.ROOT, "%25s%10s%10d\n", stat.name, TimeValue.timeValueMillis(stat.took), (stat.took / QUERY_COUNT)); + } + System.out.println("------------------ SUMMARY -------------------------------"); + + for (Node node : nodes) { + node.close(); + } + } + + protected static void setMapping(double acceptableOverheadRatio, IndexFieldData.CommonSettings.MemoryStorageFormat fielddataStorageFormat) throws IOException { + XContentBuilder mapping = JsonXContent.contentBuilder(); + mapping.startObject().startObject("type1").startObject("properties").startObject("l_value") + .field("type", "long") + .startObject("fielddata") + .field("acceptable_transient_overhead_ratio", acceptableOverheadRatio) + .field("acceptable_overhead_ratio", acceptableOverheadRatio) + .field(IndexFieldData.CommonSettings.SETTING_MEMORY_STORAGE_HINT, fielddataStorageFormat.name().toLowerCase(Locale.ROOT)) + .endObject() + .endObject().endObject().endObject().endObject(); + client.admin().indices().preparePutMapping("test").setType("type1").setSource(mapping).get(); + } + + static class StatsResult { + final String name; + final long took; + + StatsResult(String name, long took) { + this.name = name; + this.took = took; + } + } + + private static SearchResponse doTermsAggsSearch(String name, String field, float matchPercentage) { + SearchResponse response = client.prepareSearch() + .setSearchType(SearchType.COUNT) + .setQuery(QueryBuilders.constantScoreQuery(FilterBuilders.scriptFilter("random() warning - big deviation from expected count: " + response.getHits().totalHits() + " expected: " + COUNT * matchPercentage); + } + + return response; + } + + private static StatsResult measureAgg(String name, String field, float matchPercentage) { + long totalQueryTime;// LM VALUE + + System.out.println("--> Running (" + name + ")..."); + totalQueryTime = 0; + long previousCount = 0; + for (int j = 0; j < QUERY_COUNT; j++) { + SearchResponse searchResponse = doTermsAggsSearch(name, field, matchPercentage); + if (previousCount == 0) { + previousCount = searchResponse.getHits().getTotalHits(); + } else if (searchResponse.getHits().totalHits() != previousCount) { + System.err.println("*** HIT COUNT CHANGE -> CACHE EXPIRED? ***"); + } + totalQueryTime += searchResponse.getTookInMillis(); + } + System.out.println("--> Histogram aggregations (" + field + "): " + (totalQueryTime / QUERY_COUNT) + "ms"); + return new StatsResult(name, totalQueryTime); + } + + private static void warmUp(String name, String field, float matchPercentage) { + System.out.println("--> Warmup (" + name + ")..."); + client.admin().indices().prepareClearCache().setFieldDataCache(true).execute().actionGet(); + + // run just the child query, warm up first + for (int j = 0; j < QUERY_WARMUP; j++) { + SearchResponse searchResponse = doTermsAggsSearch(name, field, matchPercentage); + if (j == 0) { + System.out.println("--> Loading (" + field + "): took: " + searchResponse.getTook()); + } + } + System.out.println("--> Warmup (" + name + ") DONE"); + } +} diff --git a/src/test/java/org/elasticsearch/index/fielddata/AbstractNumericFieldDataTests.java b/src/test/java/org/elasticsearch/index/fielddata/AbstractNumericFieldDataTests.java index 817a7c1e710..d001fb93744 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/AbstractNumericFieldDataTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/AbstractNumericFieldDataTests.java @@ -24,9 +24,12 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.StringField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.search.*; +import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.index.fielddata.fieldcomparator.SortMode; import org.junit.Test; +import java.util.Locale; + import static org.hamcrest.Matchers.equalTo; /** @@ -35,6 +38,16 @@ public abstract class AbstractNumericFieldDataTests extends AbstractFieldDataImp protected abstract FieldDataType getFieldDataType(); + protected ImmutableSettings.Builder getFieldDataSettings() { + ImmutableSettings.Builder builder = ImmutableSettings.builder(); + IndexFieldData.CommonSettings.MemoryStorageFormat[] formats = IndexFieldData.CommonSettings.MemoryStorageFormat.values(); + int i = randomInt(formats.length); + if (i < formats.length) { + builder.put(IndexFieldData.CommonSettings.SETTING_MEMORY_STORAGE_HINT, formats[i].name().toLowerCase(Locale.ROOT)); + } + return builder; + } + @Test public void testSingleValueAllSetNumber() throws Exception { fillSingleValueAllSet(); @@ -118,7 +131,7 @@ public abstract class AbstractNumericFieldDataTests extends AbstractFieldDataImp assertThat(1, equalTo(doubleValues.setDocument(2))); assertThat(doubleValues.nextValue(), equalTo(3d)); - + IndexSearcher searcher = new IndexSearcher(readerContext.reader()); TopFieldDocs topDocs; @@ -236,7 +249,7 @@ public abstract class AbstractNumericFieldDataTests extends AbstractFieldDataImp assertThat(1, equalTo(doubleValues.setDocument(2))); assertThat(doubleValues.nextValue(), equalTo(3d)); - + } @Test @@ -268,7 +281,7 @@ public abstract class AbstractNumericFieldDataTests extends AbstractFieldDataImp assertThat(0, equalTo(doubleValues.setDocument(1))); assertThat(0, equalTo(doubleValues.setDocument(2))); - } + } protected void fillAllMissing() throws Exception { diff --git a/src/test/java/org/elasticsearch/index/fielddata/DoubleFieldDataTests.java b/src/test/java/org/elasticsearch/index/fielddata/DoubleFieldDataTests.java index 77a5bb22976..ce55cc5f73e 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/DoubleFieldDataTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/DoubleFieldDataTests.java @@ -31,7 +31,7 @@ public class DoubleFieldDataTests extends AbstractNumericFieldDataTests { @Override protected FieldDataType getFieldDataType() { - return new FieldDataType("double"); + return new FieldDataType("double", getFieldDataSettings()); } protected String one() { diff --git a/src/test/java/org/elasticsearch/index/fielddata/FloatFieldDataTests.java b/src/test/java/org/elasticsearch/index/fielddata/FloatFieldDataTests.java index a91797d8c07..36debc2a325 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/FloatFieldDataTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/FloatFieldDataTests.java @@ -30,7 +30,7 @@ public class FloatFieldDataTests extends AbstractNumericFieldDataTests { @Override protected FieldDataType getFieldDataType() { - return new FieldDataType("float"); + return new FieldDataType("float", getFieldDataSettings()); } protected String one() { diff --git a/src/test/java/org/elasticsearch/index/fielddata/LongFieldDataTests.java b/src/test/java/org/elasticsearch/index/fielddata/LongFieldDataTests.java index 9a75ed128ab..f9ae10167eb 100644 --- a/src/test/java/org/elasticsearch/index/fielddata/LongFieldDataTests.java +++ b/src/test/java/org/elasticsearch/index/fielddata/LongFieldDataTests.java @@ -26,7 +26,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.StringField; import org.apache.lucene.index.Term; -import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.index.fielddata.plain.PackedArrayAtomicFieldData; import org.elasticsearch.index.merge.Merges; import org.joda.time.DateTimeZone; @@ -46,7 +45,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { @Override protected FieldDataType getFieldDataType() { // we don't want to optimize the type so it will always be a long... - return new FieldDataType("long", ImmutableSettings.builder()); + return new FieldDataType("long", getFieldDataSettings()); } protected void add2SingleValuedDocumentsAndDeleteOneOfThem() throws Exception { @@ -86,13 +85,13 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { private static long getFirst(LongValues values, int docId) { final int numValues = values.setDocument(docId); - assertThat(numValues , is(1)); + assertThat(numValues, is(1)); return values.nextValue(); } private static double getFirst(DoubleValues values, int docId) { final int numValues = values.setDocument(docId); - assertThat(numValues , is(1)); + assertThat(numValues, is(1)); return values.nextValue(); } @@ -242,6 +241,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { public int numValues(Random r) { return 1; } + @Override public long nextValue(Random r) { return 1 + r.nextInt(16); @@ -251,6 +251,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { public int numValues(Random r) { return 1; } + @Override public long nextValue(Random r) { // somewhere in-between 2010 and 2012 @@ -261,6 +262,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { public int numValues(Random r) { return r.nextInt(3); } + @Override public long nextValue(Random r) { // somewhere in-between 2010 and 2012 @@ -271,6 +273,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { public int numValues(Random r) { return r.nextInt(3); } + @Override public long nextValue(Random r) { return 3 + r.nextInt(8); @@ -280,6 +283,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { public int numValues(Random r) { return r.nextFloat() < 0.1f ? 1 : 0; } + @Override public long nextValue(Random r) { return r.nextLong(); @@ -289,6 +293,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { public int numValues(Random r) { return r.nextFloat() < 0.1f ? 1 + r.nextInt(5) : 0; } + @Override public long nextValue(Random r) { return r.nextLong(); @@ -298,12 +303,15 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests { public int numValues(Random r) { return 1 + r.nextInt(3); } + @Override public long nextValue(Random r) { return r.nextLong(); } }; + public abstract int numValues(Random r); + public abstract long nextValue(Random r); }