mirror of
https://github.com/apache/druid.git
synced 2025-02-25 12:35:33 +00:00
HadoopGlobPathSplitter implementation to split hadoop glob paths
This can be safely reverted once https://issues.apache.org/jira/browse/MAPREDUCE-5061 is fixed
This commit is contained in:
parent
1ecec1da5a
commit
a603bd9547
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.indexer.path;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
//Note: This class has been created to workaround https://issues.apache.org/jira/browse/MAPREDUCE-5061
|
||||
public class HadoopGlobPathSplitter
|
||||
{
|
||||
|
||||
/**
|
||||
* Splits given hadoop glob path by commas.
|
||||
* e.g. splitGlob("/a,/b") -> ["/a","/b"]
|
||||
* splitGlob("/a/{c,d}") -> ["/a/c", "/a/d"]
|
||||
*/
|
||||
public static List<StringBuilder> splitGlob(String path)
|
||||
{
|
||||
return splitGlob(new CharStream(path));
|
||||
}
|
||||
|
||||
private static List<StringBuilder> splitGlob(CharStream path)
|
||||
{
|
||||
List<StringBuilder> result = new ArrayList<>();
|
||||
|
||||
List<StringBuilder> current = new ArrayList<>();
|
||||
current.add(new StringBuilder());
|
||||
|
||||
while (path.hasMore()) {
|
||||
char c = path.next();
|
||||
switch (c) {
|
||||
case '{':
|
||||
List<StringBuilder> childResult = splitGlob(path);
|
||||
List<StringBuilder> oldCurrent = current;
|
||||
current = new ArrayList<>();
|
||||
|
||||
for (StringBuilder sb1 : oldCurrent) {
|
||||
for (StringBuilder sb2 : childResult) {
|
||||
StringBuilder sb3 = new StringBuilder();
|
||||
sb3.append(sb1);
|
||||
sb3.append(sb2);
|
||||
current.add(sb3);
|
||||
}
|
||||
}
|
||||
break;
|
||||
case '}':
|
||||
result.addAll(current);
|
||||
return result;
|
||||
case ',':
|
||||
result.addAll(current);
|
||||
current = new ArrayList<>();
|
||||
current.add(new StringBuilder());
|
||||
break;
|
||||
default:
|
||||
for (StringBuilder sb : current) {
|
||||
sb.append(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.addAll(current);
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
class CharStream
|
||||
{
|
||||
private String string;
|
||||
private int offset;
|
||||
|
||||
public CharStream(String string)
|
||||
{
|
||||
super();
|
||||
this.string = string;
|
||||
this.offset = 0;
|
||||
}
|
||||
|
||||
public boolean hasMore()
|
||||
{
|
||||
return offset < string.length();
|
||||
}
|
||||
|
||||
public char next()
|
||||
{
|
||||
return string.charAt(offset++);
|
||||
}
|
||||
}
|
@ -0,0 +1,137 @@
|
||||
/*
|
||||
* Licensed to Metamarkets Group Inc. (Metamarkets) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. Metamarkets licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package io.druid.indexer.path;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class HadoopGlobPathSplitterTest
|
||||
{
|
||||
@Test
|
||||
public void testGlobSplitting() throws Exception {
|
||||
String path = "/a/b/c";
|
||||
List<String> expected = ImmutableList.of(
|
||||
"/a/b/c"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "/a/b/c,/d/e";
|
||||
expected = ImmutableList.of(
|
||||
"/a/b/c",
|
||||
"/d/e"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "/a/b/*.c,/d/*.e";
|
||||
expected = ImmutableList.of(
|
||||
"/a/b/*.c",
|
||||
"/d/*.e"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "/a/b/c,/d/e,/f/g";
|
||||
expected = ImmutableList.of(
|
||||
"/a/b/c",
|
||||
"/d/e",
|
||||
"/f/g"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "/a/b/{c,d}";
|
||||
expected = ImmutableList.of(
|
||||
"/a/b/c",
|
||||
"/a/b/d"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "/a/b/{c,d}/e";
|
||||
expected = ImmutableList.of(
|
||||
"/a/b/c/e",
|
||||
"/a/b/d/e"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "{c,d}";
|
||||
expected = ImmutableList.of(
|
||||
"c",
|
||||
"d"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "{c,d}/e";
|
||||
expected = ImmutableList.of(
|
||||
"c/e",
|
||||
"d/e"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "/a/b/{c,d},/a/b/{c,d}/e,{c,d},{c,d}/e";
|
||||
expected = ImmutableList.of(
|
||||
"/a/b/c",
|
||||
"/a/b/d",
|
||||
"/a/b/c/e",
|
||||
"/a/b/d/e",
|
||||
"c",
|
||||
"d",
|
||||
"c/e",
|
||||
"d/e"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "/a/b/{c/{d,e/{f,g},h},i}/{j,k}/l";
|
||||
expected = ImmutableList.of(
|
||||
"/a/b/c/d/j/l",
|
||||
"/a/b/c/d/k/l",
|
||||
"/a/b/c/e/f/j/l",
|
||||
"/a/b/c/e/f/k/l",
|
||||
"/a/b/c/e/g/j/l",
|
||||
"/a/b/c/e/g/k/l",
|
||||
"/a/b/c/h/j/l",
|
||||
"/a/b/c/h/k/l",
|
||||
"/a/b/i/j/l",
|
||||
"/a/b/i/k/l"
|
||||
);
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
|
||||
path = "";
|
||||
expected = ImmutableList.of("");
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
|
||||
path = "{}";
|
||||
expected = ImmutableList.of("");
|
||||
Assert.assertEquals(expected, splitGlob(path));
|
||||
}
|
||||
|
||||
private static List<String> splitGlob(String path) {
|
||||
List<StringBuilder> tmp = HadoopGlobPathSplitter.splitGlob(path);
|
||||
List<String> result = new ArrayList<>(tmp.size());
|
||||
for(StringBuilder sb : tmp) {
|
||||
result.add(sb.toString());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user