HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the header
would cause the reader to read the sync marker as a record. Contributed by Jay Booth git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@813698 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ae6721a85a
commit
c9cc61e853
|
@ -1008,6 +1008,10 @@ Trunk (unreleased changes)
|
||||||
HADOOP-6181. Fix .eclipse.templates/.classpath for avro and jets3t jar
|
HADOOP-6181. Fix .eclipse.templates/.classpath for avro and jets3t jar
|
||||||
files. (Carlos Valiente via szetszwo)
|
files. (Carlos Valiente via szetszwo)
|
||||||
|
|
||||||
|
HADOOP-6196. Fix a bug in SequenceFile.Reader where syncing within the
|
||||||
|
header would cause the reader to read the sync marker as a record. (Jay
|
||||||
|
Booth via cdouglas)
|
||||||
|
|
||||||
Release 0.20.1 - Unreleased
|
Release 0.20.1 - Unreleased
|
||||||
|
|
||||||
INCOMPATIBLE CHANGES
|
INCOMPATIBLE CHANGES
|
||||||
|
|
|
@ -1397,6 +1397,7 @@ public class SequenceFile {
|
||||||
private byte[] syncCheck = new byte[SYNC_HASH_SIZE];
|
private byte[] syncCheck = new byte[SYNC_HASH_SIZE];
|
||||||
private boolean syncSeen;
|
private boolean syncSeen;
|
||||||
|
|
||||||
|
private long headerEnd;
|
||||||
private long end;
|
private long end;
|
||||||
private int keyLength;
|
private int keyLength;
|
||||||
private int recordLength;
|
private int recordLength;
|
||||||
|
@ -1546,6 +1547,7 @@ public class SequenceFile {
|
||||||
|
|
||||||
if (version > 1) { // if version > 1
|
if (version > 1) { // if version > 1
|
||||||
in.readFully(sync); // read sync bytes
|
in.readFully(sync); // read sync bytes
|
||||||
|
headerEnd = in.getPos(); // record end of header
|
||||||
}
|
}
|
||||||
|
|
||||||
// Initialize... *not* if this we are constructing a temporary Reader
|
// Initialize... *not* if this we are constructing a temporary Reader
|
||||||
|
@ -2210,6 +2212,14 @@ public class SequenceFile {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (position < headerEnd) {
|
||||||
|
// seek directly to first record
|
||||||
|
in.seek(headerEnd);
|
||||||
|
// note the sync marker "seen" in the header
|
||||||
|
syncSeen = true;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
seek(position+4); // skip escape
|
seek(position+4); // skip escape
|
||||||
in.readFully(syncCheck);
|
in.readFully(syncCheck);
|
||||||
|
|
|
@ -0,0 +1,107 @@
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.hadoop.io;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.util.Random;
|
||||||
|
import java.net.URI;
|
||||||
|
|
||||||
|
import org.apache.hadoop.conf.Configuration;
|
||||||
|
import org.apache.hadoop.fs.FileSystem;
|
||||||
|
import org.apache.hadoop.fs.Path;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
import org.apache.commons.logging.Log;
|
||||||
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
|
||||||
|
import static org.junit.Assert.*;
|
||||||
|
|
||||||
|
public class TestSequenceFileSync {
|
||||||
|
private static final int NUMRECORDS = 2000;
|
||||||
|
private static final int RECORDSIZE = 80;
|
||||||
|
private static final Random rand = new Random();
|
||||||
|
|
||||||
|
private final static String REC_FMT = "%d RECORDID %d : ";
|
||||||
|
|
||||||
|
|
||||||
|
private static void forOffset(SequenceFile.Reader reader,
|
||||||
|
IntWritable key, Text val, int iter, long off, int expectedRecord)
|
||||||
|
throws IOException {
|
||||||
|
val.clear();
|
||||||
|
reader.sync(off);
|
||||||
|
reader.next(key, val);
|
||||||
|
assertEquals(key.get(), expectedRecord);
|
||||||
|
final String test = String.format(REC_FMT, expectedRecord, expectedRecord);
|
||||||
|
assertEquals("Invalid value " + val, 0, val.find(test, 0));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testLowSyncpoint() throws IOException {
|
||||||
|
final Configuration conf = new Configuration();
|
||||||
|
final FileSystem fs = FileSystem.getLocal(conf);
|
||||||
|
final Path path = new Path(System.getProperty("test.build.data", "/tmp"),
|
||||||
|
"sequencefile.sync.test");
|
||||||
|
final IntWritable input = new IntWritable();
|
||||||
|
final Text val = new Text();
|
||||||
|
SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path,
|
||||||
|
IntWritable.class, Text.class);
|
||||||
|
try {
|
||||||
|
writeSequenceFile(writer, NUMRECORDS);
|
||||||
|
for (int i = 0; i < 5 ; i++) {
|
||||||
|
final SequenceFile.Reader reader =
|
||||||
|
new SequenceFile.Reader(fs, path, conf);
|
||||||
|
try {
|
||||||
|
forOffset(reader, input, val, i, 0, 0);
|
||||||
|
forOffset(reader, input, val, i, 65, 0);
|
||||||
|
forOffset(reader, input, val, i, 2000, 21);
|
||||||
|
forOffset(reader, input, val, i, 0, 0);
|
||||||
|
} finally {
|
||||||
|
reader.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
fs.delete(path, false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void writeSequenceFile(SequenceFile.Writer writer,
|
||||||
|
int numRecords) throws IOException {
|
||||||
|
final IntWritable key = new IntWritable();
|
||||||
|
final Text val = new Text();
|
||||||
|
for (int numWritten = 0; numWritten < numRecords; ++numWritten) {
|
||||||
|
key.set(numWritten);
|
||||||
|
randomText(val, numWritten, RECORDSIZE);
|
||||||
|
writer.append(key, val);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void randomText(Text val, int id, int recordSize) {
|
||||||
|
val.clear();
|
||||||
|
final StringBuilder ret = new StringBuilder(recordSize);
|
||||||
|
ret.append(String.format(REC_FMT, id, id));
|
||||||
|
recordSize -= ret.length();
|
||||||
|
for (int i = 0; i < recordSize; ++i) {
|
||||||
|
ret.append(rand.nextInt(9));
|
||||||
|
}
|
||||||
|
val.set(ret.toString());
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue