HADOOP-11698. Remove DistCpV1 and Logalyzer. Contributed by Brahma Reddy Battula.

This commit is contained in:
Akira Ajisaka 2015-05-20 19:25:45 +09:00
parent ce53c8eb0c
commit 4aa730ce85
5 changed files with 3 additions and 3213 deletions

View File

@ -26,6 +26,9 @@ Trunk (Unreleased)
HADOOP-11627. Remove io.native.lib.available.
(Brahma Reddy Battula via aajisaka)
HADOOP-11698. Remove DistCpV1 and Logalyzer.
(Brahma Reddy Battula via aajisaka)
NEW FEATURES
HADOOP-6590. Add a username check for hadoop sub-commands (John Smith via

View File

@ -1,329 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools;
import java.io.ByteArrayInputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configuration.DeprecationDelta;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableComparator;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.LongSumReducer;
import org.apache.hadoop.mapreduce.lib.map.RegexMapper;
/**
* Logalyzer: A utility tool for archiving and analyzing hadoop logs.
* <p>
* This tool supports archiving and anaylzing (sort/grep) of log-files.
* It takes as input
* a) Input uri which will serve uris of the logs to be archived.
* b) Output directory (not mandatory).
* b) Directory on dfs to archive the logs.
* c) The sort/grep patterns for analyzing the files and separator for boundaries.
* Usage:
* Logalyzer -archive -archiveDir &lt;directory to archive logs&gt; -analysis
* &lt;directory&gt; -logs &lt;log-list uri&gt; -grep &lt;pattern&gt; -sort
* &lt;col1, col2&gt; -separator &lt;separator&gt;
* <p>
*/
@Deprecated
public class Logalyzer {
// Constants
private static Configuration fsConfig = new Configuration();
public static final String SORT_COLUMNS =
"logalizer.logcomparator.sort.columns";
public static final String COLUMN_SEPARATOR =
"logalizer.logcomparator.column.separator";
static {
Configuration.addDeprecations(new DeprecationDelta[] {
new DeprecationDelta("mapred.reducer.sort", SORT_COLUMNS),
new DeprecationDelta("mapred.reducer.separator", COLUMN_SEPARATOR)
});
}
/** A {@link Mapper} that extracts text matching a regular expression. */
public static class LogRegexMapper<K extends WritableComparable>
extends MapReduceBase
implements Mapper<K, Text, Text, LongWritable> {
private Pattern pattern;
public void configure(JobConf job) {
pattern = Pattern.compile(job.get(RegexMapper.PATTERN));
}
public void map(K key, Text value,
OutputCollector<Text, LongWritable> output,
Reporter reporter)
throws IOException {
String text = value.toString();
Matcher matcher = pattern.matcher(text);
while (matcher.find()) {
output.collect(value, new LongWritable(1));
}
}
}
/** A WritableComparator optimized for UTF8 keys of the logs. */
public static class LogComparator extends Text.Comparator implements Configurable {
private static Log LOG = LogFactory.getLog(Logalyzer.class);
private JobConf conf = null;
private String[] sortSpec = null;
private String columnSeparator = null;
public void setConf(Configuration conf) {
if (conf instanceof JobConf) {
this.conf = (JobConf) conf;
} else {
this.conf = new JobConf(conf);
}
//Initialize the specification for *comparision*
String sortColumns = this.conf.get(SORT_COLUMNS, null);
if (sortColumns != null) {
sortSpec = sortColumns.split(",");
}
//Column-separator
columnSeparator = this.conf.get(COLUMN_SEPARATOR, "");
}
public Configuration getConf() {
return conf;
}
public int compare(byte[] b1, int s1, int l1,
byte[] b2, int s2, int l2) {
if (sortSpec == null) {
return super.compare(b1, s1, l1, b2, s2, l2);
}
try {
Text logline1 = new Text();
logline1.readFields(new DataInputStream(new ByteArrayInputStream(b1, s1, l1)));
String line1 = logline1.toString();
String[] logColumns1 = line1.split(columnSeparator);
Text logline2 = new Text();
logline2.readFields(new DataInputStream(new ByteArrayInputStream(b2, s2, l2)));
String line2 = logline2.toString();
String[] logColumns2 = line2.split(columnSeparator);
if (logColumns1 == null || logColumns2 == null) {
return super.compare(b1, s1, l1, b2, s2, l2);
}
//Compare column-wise according to *sortSpec*
for(int i=0; i < sortSpec.length; ++i) {
int column = Integer.parseInt(sortSpec[i]);
String c1 = logColumns1[column];
String c2 = logColumns2[column];
//Compare columns
int comparision = super.compareBytes(
c1.getBytes(Charset.forName("UTF-8")), 0, c1.length(),
c2.getBytes(Charset.forName("UTF-8")), 0, c2.length()
);
//They differ!
if (comparision != 0) {
return comparision;
}
}
} catch (IOException ioe) {
LOG.fatal("Caught " + ioe);
return 0;
}
return 0;
}
static {
// register this comparator
WritableComparator.define(Text.class, new LogComparator());
}
}
/**
* doArchive: Workhorse function to archive log-files.
* @param logListURI : The uri which will serve list of log-files to archive.
* @param archiveDirectory : The directory to store archived logfiles.
* @throws IOException
*/
@SuppressWarnings("deprecation")
public void
doArchive(String logListURI, String archiveDirectory)
throws IOException
{
String destURL = FileSystem.getDefaultUri(fsConfig) + archiveDirectory;
DistCpV1.copy(new JobConf(fsConfig), logListURI, destURL, null, true, false);
}
/**
* doAnalyze:
* @param inputFilesDirectory : Directory containing the files to be analyzed.
* @param outputDirectory : Directory to store analysis (output).
* @param grepPattern : Pattern to *grep* for.
* @param sortColumns : Sort specification for output.
* @param columnSeparator : Column separator.
* @throws IOException
*/
public void
doAnalyze(String inputFilesDirectory, String outputDirectory,
String grepPattern, String sortColumns, String columnSeparator)
throws IOException
{
Path grepInput = new Path(inputFilesDirectory);
Path analysisOutput = null;
if (outputDirectory.equals("")) {
analysisOutput = new Path(inputFilesDirectory, "logalyzer_" +
Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
} else {
analysisOutput = new Path(outputDirectory);
}
JobConf grepJob = new JobConf(fsConfig);
grepJob.setJobName("logalyzer-grep-sort");
FileInputFormat.setInputPaths(grepJob, grepInput);
grepJob.setInputFormat(TextInputFormat.class);
grepJob.setMapperClass(LogRegexMapper.class);
grepJob.set(RegexMapper.PATTERN, grepPattern);
grepJob.set(SORT_COLUMNS, sortColumns);
grepJob.set(COLUMN_SEPARATOR, columnSeparator);
grepJob.setCombinerClass(LongSumReducer.class);
grepJob.setReducerClass(LongSumReducer.class);
FileOutputFormat.setOutputPath(grepJob, analysisOutput);
grepJob.setOutputFormat(TextOutputFormat.class);
grepJob.setOutputKeyClass(Text.class);
grepJob.setOutputValueClass(LongWritable.class);
grepJob.setOutputKeyComparatorClass(LogComparator.class);
grepJob.setNumReduceTasks(1); // write a single file
JobClient.runJob(grepJob);
}
public static void main(String[] args) {
Log LOG = LogFactory.getLog(Logalyzer.class);
String version = "Logalyzer.0.0.1";
String usage = "Usage: Logalyzer [-archive -logs <urlsFile>] " +
"-archiveDir <archiveDirectory> " +
"-grep <pattern> -sort <column1,column2,...> -separator <separator> " +
"-analysis <outputDirectory>";
System.out.println(version);
if (args.length == 0) {
System.err.println(usage);
System.exit(-1);
}
//Command line arguments
boolean archive = false;
boolean grep = false;
boolean sort = false;
String archiveDir = "";
String logListURI = "";
String grepPattern = ".*";
String sortColumns = "";
String columnSeparator = " ";
String outputDirectory = "";
for (int i = 0; i < args.length; i++) { // parse command line
if (args[i].equals("-archive")) {
archive = true;
} else if (args[i].equals("-archiveDir")) {
archiveDir = args[++i];
} else if (args[i].equals("-grep")) {
grep = true;
grepPattern = args[++i];
} else if (args[i].equals("-logs")) {
logListURI = args[++i];
} else if (args[i].equals("-sort")) {
sort = true;
sortColumns = args[++i];
} else if (args[i].equals("-separator")) {
columnSeparator = args[++i];
} else if (args[i].equals("-analysis")) {
outputDirectory = args[++i];
}
}
LOG.info("analysisDir = " + outputDirectory);
LOG.info("archiveDir = " + archiveDir);
LOG.info("logListURI = " + logListURI);
LOG.info("grepPattern = " + grepPattern);
LOG.info("sortColumns = " + sortColumns);
LOG.info("separator = " + columnSeparator);
try {
Logalyzer logalyzer = new Logalyzer();
// Archive?
if (archive) {
logalyzer.doArchive(logListURI, archiveDir);
}
// Analyze?
if (grep || sort) {
logalyzer.doAnalyze(archiveDir, outputDirectory, grepPattern, sortColumns, columnSeparator);
}
} catch (IOException ioe) {
ioe.printStackTrace();
System.exit(-1);
}
} //main
} //class Logalyzer

View File

@ -1,133 +0,0 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.tools;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.EnumSet;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.CreateFlag;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileContext;
import org.apache.hadoop.fs.Path;
import org.junit.Assert;
import org.junit.Test;
public class TestLogalyzer {
private static String EL = System.getProperty("line.separator");
private static String TAB = "\t";
private static final Log LOG = LogFactory.getLog(TestLogalyzer.class);
private static File workSpace = new File("target",
TestLogalyzer.class.getName() + "-workSpace");
private static File outdir = new File(workSpace.getAbsoluteFile()
+ File.separator + "out");
@Test
@SuppressWarnings("deprecation")
public void testLogalyzer() throws Exception {
Path f = createLogFile();
String[] args = new String[10];
args[0] = "-archiveDir";
args[1] = f.toString();
args[2] = "-grep";
args[3] = "44";
args[4] = "-sort";
args[5] = "0";
args[6] = "-analysis";
args[7] = outdir.getAbsolutePath();
args[8] = "-separator";
args[9] = " ";
Logalyzer.main(args);
checkResult();
}
private void checkResult() throws Exception {
File result = new File(outdir.getAbsolutePath() + File.separator
+ "part-00000");
File success = new File(outdir.getAbsolutePath() + File.separator
+ "_SUCCESS");
Assert.assertTrue(success.exists());
FileInputStream fis = new FileInputStream(result);
BufferedReader br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
String line = br.readLine();
Assert.assertTrue(("1 44" + TAB + "2").equals(line));
line = br.readLine();
Assert.assertTrue(("3 44" + TAB + "1").equals(line));
line = br.readLine();
Assert.assertTrue(("4 44" + TAB + "1").equals(line));
br.close();
}
/**
* Create simple log file
*
* @return
* @throws IOException
*/
private Path createLogFile() throws IOException {
FileContext files = FileContext.getLocalFSFileContext();
Path ws = new Path(workSpace.getAbsoluteFile().getAbsolutePath());
files.delete(ws, true);
Path workSpacePath = new Path(workSpace.getAbsolutePath(), "log");
files.mkdir(workSpacePath, null, true);
LOG.info("create logfile.log");
Path logfile1 = new Path(workSpacePath, "logfile.log");
FSDataOutputStream os = files.create(logfile1,
EnumSet.of(CreateFlag.CREATE));
os.writeBytes("4 3" + EL + "1 3" + EL + "4 44" + EL);
os.writeBytes("2 3" + EL + "1 3" + EL + "0 45" + EL);
os.writeBytes("4 3" + EL + "1 3" + EL + "1 44" + EL);
os.flush();
os.close();
LOG.info("create logfile1.log");
Path logfile2 = new Path(workSpacePath, "logfile1.log");
os = files.create(logfile2, EnumSet.of(CreateFlag.CREATE));
os.writeBytes("4 3" + EL + "1 3" + EL + "3 44" + EL);
os.writeBytes("2 3" + EL + "1 3" + EL + "0 45" + EL);
os.writeBytes("4 3" + EL + "1 3" + EL + "1 44" + EL);
os.flush();
os.close();
return workSpacePath;
}
}