better compressed input offset data structure
This commit is contained in:
parent
b009c9c652
commit
9e6cfa77a5
|
@ -20,6 +20,7 @@
|
||||||
package org.elasticsearch.common.compress;
|
package org.elasticsearch.common.compress;
|
||||||
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
import org.apache.lucene.store.IndexInput;
|
||||||
|
import org.elasticsearch.common.util.BigLongArray;
|
||||||
|
|
||||||
import java.io.EOFException;
|
import java.io.EOFException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -32,7 +33,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
||||||
|
|
||||||
private int version;
|
private int version;
|
||||||
private long uncompressedLength;
|
private long uncompressedLength;
|
||||||
private long[] offsets;
|
private BigLongArray offsets;
|
||||||
|
|
||||||
private boolean closed;
|
private boolean closed;
|
||||||
|
|
||||||
|
@ -55,9 +56,9 @@ public abstract class CompressedIndexInput extends IndexInput {
|
||||||
in.seek(metaDataPosition);
|
in.seek(metaDataPosition);
|
||||||
this.uncompressedLength = in.readVLong();
|
this.uncompressedLength = in.readVLong();
|
||||||
int size = in.readVInt();
|
int size = in.readVInt();
|
||||||
offsets = new long[size];
|
offsets = new BigLongArray(size);
|
||||||
for (int i = 0; i < offsets.length; i++) {
|
for (int i = 0; i < size; i++) {
|
||||||
offsets[i] = in.readVLong();
|
offsets.set(i, in.readVLong());
|
||||||
}
|
}
|
||||||
this.currentOffsetIdx = -1;
|
this.currentOffsetIdx = -1;
|
||||||
this.currentOffset = 0;
|
this.currentOffset = 0;
|
||||||
|
@ -137,7 +138,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
||||||
@Override
|
@Override
|
||||||
public void seek(long pos) throws IOException {
|
public void seek(long pos) throws IOException {
|
||||||
int idx = (int) (pos / uncompressed.length);
|
int idx = (int) (pos / uncompressed.length);
|
||||||
if (idx >= offsets.length) {
|
if (idx >= offsets.size) {
|
||||||
// set the next "readyBuffer" to EOF
|
// set the next "readyBuffer" to EOF
|
||||||
currentOffsetIdx = idx;
|
currentOffsetIdx = idx;
|
||||||
position = 0;
|
position = 0;
|
||||||
|
@ -146,7 +147,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: optimize so we won't have to readyBuffer on seek, can keep the position around, and set it on readyBuffer in this case
|
// TODO: optimize so we won't have to readyBuffer on seek, can keep the position around, and set it on readyBuffer in this case
|
||||||
long pointer = offsets[idx];
|
long pointer = offsets.get(idx);
|
||||||
if (pointer != currentOffset) {
|
if (pointer != currentOffset) {
|
||||||
in.seek(pointer);
|
in.seek(pointer);
|
||||||
position = 0;
|
position = 0;
|
||||||
|
@ -182,7 +183,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
// we reached the end...
|
// we reached the end...
|
||||||
if (currentOffsetIdx + 1 >= offsets.length) {
|
if (currentOffsetIdx + 1 >= offsets.size) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
valid = uncompress(in, uncompressed);
|
valid = uncompress(in, uncompressed);
|
||||||
|
@ -190,7 +191,7 @@ public abstract class CompressedIndexInput extends IndexInput {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
currentOffsetIdx++;
|
currentOffsetIdx++;
|
||||||
currentOffset = offsets[currentOffsetIdx];
|
currentOffset = offsets.get(currentOffsetIdx);
|
||||||
currentOffsetFilePointer = currentOffset - headerLength;
|
currentOffsetFilePointer = currentOffset - headerLength;
|
||||||
position = 0;
|
position = 0;
|
||||||
return (position < valid);
|
return (position < valid);
|
||||||
|
|
|
@ -40,6 +40,7 @@ public abstract class CompressedIndexOutput extends IndexOutput {
|
||||||
private boolean closed;
|
private boolean closed;
|
||||||
|
|
||||||
private final long metaDataPointer;
|
private final long metaDataPointer;
|
||||||
|
// need to have a growing segment long array list here...
|
||||||
private TLongArrayList offsets = new TLongArrayList();
|
private TLongArrayList offsets = new TLongArrayList();
|
||||||
|
|
||||||
public CompressedIndexOutput(IndexOutput out) throws IOException {
|
public CompressedIndexOutput(IndexOutput out) throws IOException {
|
||||||
|
|
|
@ -0,0 +1,75 @@
|
||||||
|
/*
|
||||||
|
* Licensed to ElasticSearch and Shay Banon under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. ElasticSearch licenses this
|
||||||
|
* file to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.elasticsearch.common.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A GC friendly long[].
|
||||||
|
* Allocating large arrays (that are not short-lived) generate fragmentation
|
||||||
|
* in old-gen space. This breaks such large long array into fixed size pages
|
||||||
|
* to avoid that problem.
|
||||||
|
*/
|
||||||
|
public class BigLongArray {
|
||||||
|
|
||||||
|
private static final int DEFAULT_PAGE_SIZE = 4096;
|
||||||
|
|
||||||
|
private final long[][] pages;
|
||||||
|
public final int size;
|
||||||
|
|
||||||
|
private final int pageSize;
|
||||||
|
private final int pageCount;
|
||||||
|
|
||||||
|
public BigLongArray(int size) {
|
||||||
|
this(size, DEFAULT_PAGE_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
public BigLongArray(int size, int pageSize) {
|
||||||
|
this.size = size;
|
||||||
|
this.pageSize = pageSize;
|
||||||
|
|
||||||
|
int lastPageSize = size % pageSize;
|
||||||
|
int fullPageCount = size / pageSize;
|
||||||
|
pageCount = fullPageCount + (lastPageSize == 0 ? 0 : 1);
|
||||||
|
pages = new long[pageCount][];
|
||||||
|
|
||||||
|
for (int i = 0; i < fullPageCount; ++i)
|
||||||
|
pages[i] = new long[pageSize];
|
||||||
|
|
||||||
|
if (lastPageSize != 0)
|
||||||
|
pages[pages.length - 1] = new long[lastPageSize];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void set(int idx, long value) {
|
||||||
|
if (idx < 0 || idx > size)
|
||||||
|
throw new IndexOutOfBoundsException(String.format("%d is not whithin [0, %d)", idx, size));
|
||||||
|
|
||||||
|
int page = idx / pageSize;
|
||||||
|
int pageIdx = idx % pageSize;
|
||||||
|
pages[page][pageIdx] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public long get(int idx) {
|
||||||
|
if (idx < 0 || idx > size)
|
||||||
|
throw new IndexOutOfBoundsException(String.format("%d is not whithin [0, %d)", idx, size));
|
||||||
|
|
||||||
|
int page = idx / pageSize;
|
||||||
|
int pageIdx = idx % pageSize;
|
||||||
|
return pages[page][pageIdx];
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue