better compressed input offset data structure
This commit is contained in:
parent
b009c9c652
commit
9e6cfa77a5
|
@ -20,6 +20,7 @@
|
|||
package org.elasticsearch.common.compress;
|
||||
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.elasticsearch.common.util.BigLongArray;
|
||||
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
|
@ -32,7 +33,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
|||
|
||||
private int version;
|
||||
private long uncompressedLength;
|
||||
private long[] offsets;
|
||||
private BigLongArray offsets;
|
||||
|
||||
private boolean closed;
|
||||
|
||||
|
@ -55,9 +56,9 @@ public abstract class CompressedIndexInput extends IndexInput {
|
|||
in.seek(metaDataPosition);
|
||||
this.uncompressedLength = in.readVLong();
|
||||
int size = in.readVInt();
|
||||
offsets = new long[size];
|
||||
for (int i = 0; i < offsets.length; i++) {
|
||||
offsets[i] = in.readVLong();
|
||||
offsets = new BigLongArray(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
offsets.set(i, in.readVLong());
|
||||
}
|
||||
this.currentOffsetIdx = -1;
|
||||
this.currentOffset = 0;
|
||||
|
@ -137,7 +138,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
|||
@Override
|
||||
public void seek(long pos) throws IOException {
|
||||
int idx = (int) (pos / uncompressed.length);
|
||||
if (idx >= offsets.length) {
|
||||
if (idx >= offsets.size) {
|
||||
// set the next "readyBuffer" to EOF
|
||||
currentOffsetIdx = idx;
|
||||
position = 0;
|
||||
|
@ -146,7 +147,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
|||
}
|
||||
|
||||
// TODO: optimize so we won't have to readyBuffer on seek, can keep the position around, and set it on readyBuffer in this case
|
||||
long pointer = offsets[idx];
|
||||
long pointer = offsets.get(idx);
|
||||
if (pointer != currentOffset) {
|
||||
in.seek(pointer);
|
||||
position = 0;
|
||||
|
@ -182,7 +183,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
|||
return false;
|
||||
}
|
||||
// we reached the end...
|
||||
if (currentOffsetIdx + 1 >= offsets.length) {
|
||||
if (currentOffsetIdx + 1 >= offsets.size) {
|
||||
return false;
|
||||
}
|
||||
valid = uncompress(in, uncompressed);
|
||||
|
@ -190,7 +191,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
|||
return false;
|
||||
}
|
||||
currentOffsetIdx++;
|
||||
currentOffset = offsets[currentOffsetIdx];
|
||||
currentOffset = offsets.get(currentOffsetIdx);
|
||||
currentOffsetFilePointer = currentOffset - headerLength;
|
||||
position = 0;
|
||||
return (position < valid);
|
||||
|
|
|
@ -40,6 +40,7 @@ public abstract class CompressedIndexOutput extends IndexOutput {
|
|||
private boolean closed;
|
||||
|
||||
private final long metaDataPointer;
|
||||
// need to have a growing segment long array list here...
|
||||
private TLongArrayList offsets = new TLongArrayList();
|
||||
|
||||
public CompressedIndexOutput(IndexOutput out) throws IOException {
|
||||
|
|
|
@ -0,0 +1,75 @@
|
|||
/*
|
||||
* Licensed to ElasticSearch and Shay Banon under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. ElasticSearch licenses this
|
||||
* file to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.util;
|
||||
|
||||
/**
|
||||
* A GC friendly long[].
|
||||
* Allocating large arrays (that are not short-lived) generate fragmentation
|
||||
* in old-gen space. This breaks such large long array into fixed size pages
|
||||
* to avoid that problem.
|
||||
*/
|
||||
public class BigLongArray {
|
||||
|
||||
private static final int DEFAULT_PAGE_SIZE = 4096;
|
||||
|
||||
private final long[][] pages;
|
||||
public final int size;
|
||||
|
||||
private final int pageSize;
|
||||
private final int pageCount;
|
||||
|
||||
public BigLongArray(int size) {
|
||||
this(size, DEFAULT_PAGE_SIZE);
|
||||
}
|
||||
|
||||
public BigLongArray(int size, int pageSize) {
|
||||
this.size = size;
|
||||
this.pageSize = pageSize;
|
||||
|
||||
int lastPageSize = size % pageSize;
|
||||
int fullPageCount = size / pageSize;
|
||||
pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1);
|
||||
pages = new long[pageCount][];
|
||||
|
||||
for (int i = 0; i < fullPageCount; ++i)
|
||||
pages[i] = new long[pageSize];
|
||||
|
||||
if (lastPageSize != 0)
|
||||
pages[pages.length - 1] = new long[lastPageSize];
|
||||
}
|
||||
|
||||
public void set(int idx, long value) {
|
||||
if (idx < 0 || idx > size)
|
||||
throw new IndexOutOfBoundsException(String.format("%d is not whithin [0, %d)", idx, size));
|
||||
|
||||
int page = idx / pageSize;
|
||||
int pageIdx = idx % pageSize;
|
||||
pages[page][pageIdx] = value;
|
||||
}
|
||||
|
||||
public long get(int idx) {
|
||||
if (idx < 0 || idx > size)
|
||||
throw new IndexOutOfBoundsException(String.format("%d is not whithin [0, %d)", idx, size));
|
||||
|
||||
int page = idx / pageSize;
|
||||
int pageIdx = idx % pageSize;
|
||||
return pages[page][pageIdx];
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue