2018-08-26 22:07:55 -04:00

90 lines
3.0 KiB
Java

package com.baeldung.crunch;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertIterableEquals;
import java.io.File;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Calendar;
import org.apache.crunch.PCollection;
import org.apache.crunch.Pipeline;
import org.apache.crunch.Source;
import org.apache.crunch.Target;
import org.apache.crunch.impl.mem.MemPipeline;
import org.apache.crunch.io.From;
import org.apache.crunch.io.To;
import org.junit.Ignore;
import org.junit.Test;
public class MemPipelineUnitTest {
private static final String INPUT_FILE_PATH = "src/test/resources/crunch/input.txt";
@Test
public void givenPipeLineAndSource_whenSourceRead_thenExpectedNumberOfRecordsRead() {
Pipeline pipeline = MemPipeline.getInstance();
Source<String> source = From.textFile(INPUT_FILE_PATH);
PCollection<String> lines = pipeline.read(source);
assertEquals(21, lines.asCollection()
.getValue()
.size());
}
@Test
public void givenPipeLine_whenTextFileRead_thenExpectedNumberOfRecordsRead() {
Pipeline pipeline = MemPipeline.getInstance();
PCollection<String> lines = pipeline.readTextFile(INPUT_FILE_PATH);
assertEquals(21, lines.asCollection()
.getValue()
.size());
}
private String createOutputPath() throws IOException {
Path path = Files.createTempDirectory("test");
final String outputFilePath = path.toString() + File.separatorChar
+ "output.text";
return outputFilePath;
}
@Test
@Ignore("Requires Hadoop binaries")
public void givenCollection_whenWriteCalled_fileWrittenSuccessfully()
throws IOException {
PCollection<String> inputStrings = MemPipeline.collectionOf("Hello",
"Apache", "Crunch", Calendar.getInstance()
.toString());
final String outputFilePath = createOutputPath();
Target target = To.textFile(outputFilePath);
inputStrings.write(target);
Pipeline pipeline = MemPipeline.getInstance();
PCollection<String> lines = pipeline.readTextFile(outputFilePath);
assertIterableEquals(inputStrings.materialize(), lines.materialize());
}
@Test
@Ignore("Requires Hadoop binaries")
public void givenPipeLine_whenWriteTextFileCalled_fileWrittenSuccessfully()
throws IOException {
Pipeline pipeline = MemPipeline.getInstance();
PCollection<String> inputStrings = MemPipeline.collectionOf("Hello",
"Apache", "Crunch", Calendar.getInstance()
.toString());
final String outputFilePath = createOutputPath();
pipeline.writeTextFile(inputStrings, outputFilePath);
PCollection<String> lines = pipeline.readTextFile(outputFilePath);
assertIterableEquals(inputStrings.materialize(), lines.materialize());
}
}