88 lines
2.6 KiB
Java

/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.benchmark.compress;
import org.elasticsearch.common.compress.bzip2.CBZip2InputStream;
import org.elasticsearch.common.xcontent.XContentBuilder;
import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Date;
/**
*/
public class TestData {
private BufferedReader reader;
private String line;
private String id;
private String type;
private String text;
private long totalSize;
public TestData() throws IOException {
URL url = new URL("http://downloads.dbpedia.org/3.0/en/longabstract_en.nt.bz2");
BufferedInputStream stream = new BufferedInputStream(url.openStream());
// read two bytes for the header...
stream.read();
stream.read();
reader = new BufferedReader(new InputStreamReader(new CBZip2InputStream(stream)));
}
public long getTotalSize() {
return totalSize;
}
public boolean next() throws Exception {
line = reader.readLine();
if (line == null) {
return false;
}
totalSize += line.length();
int endId = line.indexOf(' ');
id = line.substring(0, endId);
int endType = line.indexOf(' ', endId + 1);
type = line.substring(endId + 1, endType);
text = line.substring(endType + 1);
return true;
}
public String currentText() {
return text;
}
/**
*/
public XContentBuilder current(XContentBuilder builder) throws Exception {
builder.startObject();
builder.field("id", id);
builder.field("type", type);
builder.field("text", text);
builder.field("time", new Date());
builder.endObject();
return builder;
}
}