Building a correct segment path for HDFS

This commit is contained in:
Jan Rudert 2013-07-04 16:00:20 +02:00
parent 0712941825
commit 6f06d701d9
3 changed files with 90 additions and 1 deletions

View File

@ -42,7 +42,7 @@ public class HdfsDataSegmentPusher implements DataSegmentPusher
@Override
public DataSegment push(File inDir, DataSegment segment) throws IOException
{
final String storageDir = DataSegmentPusherUtil.getStorageDir(segment);
final String storageDir = HdfsDataSegmentPusherUtil.getStorageDir(segment);
Path outFile = new Path(String.format("%s/%s/index.zip", config.getStorageDirectory(), storageDir));
FileSystem fs = outFile.getFileSystem(hadoopConfig);

View File

@ -0,0 +1,50 @@
/*
* Druid - a distributed column store.
* Copyright (C) 2012 Metamarkets Group Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package com.metamx.druid.loading;
import org.joda.time.format.ISODateTimeFormat;
import com.google.common.base.Joiner;
import com.metamx.druid.client.DataSegment;
/**
*/
public class HdfsDataSegmentPusherUtil
{
private static final Joiner JOINER = Joiner.on("/").skipNulls();
/**
* Due to https://issues.apache.org/jira/browse/HDFS-13 ":" are not allowed in
* path names. So we format paths differently for HDFS.
*/
public static String getStorageDir(DataSegment segment)
{
return JOINER.join(
segment.getDataSource(),
String.format(
"%s_%s",
segment.getInterval().getStart().toString(ISODateTimeFormat.basicDateTime()),
segment.getInterval().getEnd().toString(ISODateTimeFormat.basicDateTime())
),
segment.getVersion().replaceAll(":", "_"),
segment.getShardSpec().getPartitionNum()
);
}
}

View File

@ -0,0 +1,39 @@
package com.metamx.druid.loading;
import com.google.common.collect.ImmutableMap;
import com.metamx.druid.client.DataSegment;
import com.metamx.druid.index.v1.IndexIO;
import com.metamx.druid.shard.NoneShardSpec;
import org.joda.time.Interval;
import org.junit.Assert;
import org.junit.Test;
import java.util.Arrays;
/**
* @author jan.rudert
*/
public class HdfsDataSegmentPusherUtilTest {
@Test
public void testGetStorageDir() throws Exception {
Interval interval = new Interval("2011-10-01/2011-10-02");
ImmutableMap<String, Object> loadSpec = ImmutableMap.<String, Object>of("something", "or_other");
DataSegment segment = new DataSegment(
"something",
interval,
"1",
loadSpec,
Arrays.asList("dim1", "dim2"),
Arrays.asList("met1", "met2"),
new NoneShardSpec(),
IndexIO.CURRENT_VERSION_ID,
1
);
String storageDir = HdfsDataSegmentPusherUtil.getStorageDir(segment);
Assert.assertEquals("something/20111001T000000.000Z_20111002T000000.000Z/1/0", storageDir);
}
}