HADOOP-4687. Pulling trunk docs to branch

git-svn-id: https://svn.apache.org/repos/asf/hadoop/core/branches/HADOOP-4687/core@786720 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Owen O'Malley 2009-06-19 23:39:51 +00:00
parent ff11be5426
commit cfb9636781
165 changed files with 31020 additions and 0 deletions

View File

@ -0,0 +1,170 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* General
*/
img { border: 0; }
#content table {
border: 0;
width: 100%;
}
/*Hack to get IE to render the table at 100%*/
* html #content table { margin-left: -3px; }
#content th,
#content td {
margin: 0;
padding: 0;
vertical-align: top;
}
.clearboth {
clear: both;
}
.note, .warning, .fixme {
border: solid black 1px;
margin: 1em 3em;
}
.note .label {
background: #369;
color: white;
font-weight: bold;
padding: 5px 10px;
}
.note .content {
background: #F0F0FF;
color: black;
line-height: 120%;
font-size: 90%;
padding: 5px 10px;
}
.warning .label {
background: #C00;
color: white;
font-weight: bold;
padding: 5px 10px;
}
.warning .content {
background: #FFF0F0;
color: black;
line-height: 120%;
font-size: 90%;
padding: 5px 10px;
}
.fixme .label {
background: #C6C600;
color: black;
font-weight: bold;
padding: 5px 10px;
}
.fixme .content {
padding: 5px 10px;
}
/**
* Typography
*/
body {
font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
font-size: 100%;
}
#content {
font-family: Georgia, Palatino, Times, serif;
font-size: 95%;
}
#tabs {
font-size: 70%;
}
#menu {
font-size: 80%;
}
#footer {
font-size: 70%;
}
h1, h2, h3, h4, h5, h6 {
font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
font-weight: bold;
margin-top: 1em;
margin-bottom: .5em;
}
h1 {
margin-top: 0;
margin-bottom: 1em;
font-size: 1.4em;
background-color: 73CAFF
}
#content h1 {
font-size: 160%;
margin-bottom: .5em;
}
#menu h1 {
margin: 0;
padding: 10px;
background: #336699;
color: white;
}
h2 {
font-size: 120%;
background-color: 73CAFF
}
h3 { font-size: 100%; }
h4 { font-size: 90%; }
h5 { font-size: 80%; }
h6 { font-size: 75%; }
p {
line-height: 120%;
text-align: left;
margin-top: .5em;
margin-bottom: 1em;
}
#content li,
#content th,
#content td,
#content li ul,
#content li ol{
margin-top: .5em;
margin-bottom: .5em;
}
#content li li,
#minitoc-area li{
margin-top: 0em;
margin-bottom: 0em;
}
#content .attribution {
text-align: right;
font-style: italic;
font-size: 85%;
margin-top: 1em;
}
.codefrag {
font-family: "Courier New", Courier, monospace;
font-size: 110%;
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
body {
font-family: Courier New, monospace;
font-size: 10pt;
}
h1 {
font-family: Courier New, monospace;
font-size: 10pt;
}
h2 {
font-family: Courier New, monospace;
font-size: 10pt;
}
h3 {
font-family: Courier New, monospace;
font-size: 10pt;
}
a:link {
color: blue;
}
a:visited {
color: purple;
}
li {
margin-top: 1em;
margin-bottom: 1em;
}

282
src/docs/changes/changes2html.pl Executable file
View File

@ -0,0 +1,282 @@
#!/usr/bin/perl
#
# Transforms Lucene Java's CHANGES.txt into Changes.html
#
# Input is on STDIN, output is to STDOUT
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
use strict;
use warnings;
my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
my $title = undef;
my $release = undef;
my $sections = undef;
my $items = undef;
my $first_relid = undef;
my $second_relid = undef;
my @releases = ();
my @lines = <>; # Get all input at once
#
# Parse input and build hierarchical release structure in @releases
#
for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
$_ = $lines[$line_num];
next unless (/\S/); # Skip blank lines
unless ($title) {
if (/\S/) {
s/^\s+//; # Trim leading whitespace
s/\s+$//; # Trim trailing whitespace
}
$title = $_;
next;
}
if (/^(Release)|(Trunk)/) { # Release headings
$release = $_;
$sections = [];
push @releases, [ $release, $sections ];
($first_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 0);
($second_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 1);
$items = undef;
next;
}
# Section heading: 2 leading spaces, words all capitalized
if (/^ ([A-Z]+)\s*/) {
my $heading = $_;
$items = [];
push @$sections, [ $heading, $items ];
next;
}
# Handle earlier releases without sections - create a headless section
unless ($items) {
$items = [];
push @$sections, [ undef, $items ];
}
my $type;
if (@$items) { # A list item has been encountered in this section before
$type = $items->[0]; # 0th position of items array is list type
} else {
$type = get_list_type($_);
push @$items, $type;
}
if ($type eq 'numbered') { # The modern items list style
# List item boundary is another numbered item or an unindented line
my $line;
my $item = $_;
$item =~ s/^(\s{0,2}\d+\.\s*)//; # Trim the leading item number
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
$item =~ s/\n+\Z/\n/; # Trim trailing blank lines
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} elsif ($type eq 'paragraph') { # List item boundary is a blank line
my $line;
my $item = $_;
$item =~ s/^(\s+)//;
my $leading_ws_width = defined($1) ? length($1) : 0;
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} else { # $type is one of the bulleted types
# List item boundary is another bullet or a blank line
my $line;
my $item = $_;
$item =~ s/^(\s*$type\s*)//; # Trim the leading bullet
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
push @$items, $item;
--$line_num unless ($line_num == $#lines);
}
}
#
# Print HTML-ified version to STDOUT
#
print<<"__HTML_HEADER__";
<!--
**********************************************************
** WARNING: This file is generated from CHANGES.txt by the
** Perl script 'changes2html.pl'.
** Do *not* edit this file!
**********************************************************
****************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
****************************************************************************
-->
<html>
<head>
<title>$title</title>
<link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
<link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<SCRIPT>
function toggleList(e) {
element = document.getElementById(e).style;
element.display == 'none' ? element.display = 'block' : element.display='none';
}
function collapse() {
for (var i = 0; i < document.getElementsByTagName("ul").length; i++) {
var list = document.getElementsByTagName("ul")[i];
if (list.id != '$first_relid' && list.id != '$second_relid') {
list.style.display = "none";
}
}
for (var i = 0; i < document.getElementsByTagName("ol").length; i++) {
document.getElementsByTagName("ol")[i].style.display = "none";
}
}
window.onload = collapse;
</SCRIPT>
</head>
<body>
<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
<h1>$title</h1>
__HTML_HEADER__
my $heading;
my $relcnt = 0;
my $header = 'h2';
for my $rel (@releases) {
if (++$relcnt == 3) {
$header = 'h3';
print "<h2><a href=\"javascript:toggleList('older')\">";
print "Older Releases";
print "</a></h2>\n";
print "<ul id=\"older\">\n"
}
($release, $sections) = @$rel;
# The first section heading is undefined for the older sectionless releases
my $has_release_sections = $sections->[0][0];
(my $relid = lc($release)) =~ s/\s+/_/g;
print "<$header><a href=\"javascript:toggleList('$relid')\">";
print "$release";
print "</a></$header>\n";
print "<ul id=\"$relid\">\n"
if ($has_release_sections);
for my $section (@$sections) {
($heading, $items) = @$section;
(my $sectid = lc($heading)) =~ s/\s+/_/g;
my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";
print " <li><a href=\"javascript:toggleList('$relid.$sectid')\">",
($heading || ''), "</a>&nbsp;&nbsp;&nbsp;$numItemsStr\n"
if ($has_release_sections);
my $list_type = $items->[0] || '';
my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul');
my $listid = $sectid ? "$relid.$sectid" : $relid;
print " <$list id=\"$listid\">\n";
for my $itemnum (1..$#{$items}) {
my $item = $items->[$itemnum];
$item =~ s:&:&amp;:g; # Escape HTML metachars
$item =~ s:<:&lt;:g;
$item =~ s:>:&gt;:g;
$item =~ s:\s*(\([^)"]+?\))\s*$:<br />$1:; # Separate attribution
$item =~ s:\n{2,}:\n<p/>\n:g; # Keep paragraph breaks
$item =~ s{(?:${jira_url_prefix})?(HADOOP-\d+)} # Link to JIRA
{<a href="${jira_url_prefix}$1">$1</a>}g;
print " <li>$item</li>\n";
}
print " </$list>\n";
print " </li>\n" if ($has_release_sections);
}
print "</ul>\n" if ($has_release_sections);
}
print "</ul>\n" if ($relcnt > 3);
print "</body>\n</html>\n";
#
# Subroutine: get_list_type
#
# Takes one parameter:
#
# - The first line of a sub-section/point
#
# Returns one scalar:
#
# - The list type: 'numbered'; or one of the bulleted types '-', or '.' or
# 'paragraph'.
#
sub get_list_type {
my $first_list_item_line = shift;
my $type = 'paragraph'; # Default to paragraph type
if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) {
$type = 'numbered';
} elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) {
$type = $1;
}
return $type;
}
1;

View File

@ -0,0 +1,170 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* General
*/
img { border: 0; }
#content table {
border: 0;
width: 100%;
}
/*Hack to get IE to render the table at 100%*/
* html #content table { margin-left: -3px; }
#content th,
#content td {
margin: 0;
padding: 0;
vertical-align: top;
}
.clearboth {
clear: both;
}
.note, .warning, .fixme {
border: solid black 1px;
margin: 1em 3em;
}
.note .label {
background: #369;
color: white;
font-weight: bold;
padding: 5px 10px;
}
.note .content {
background: #F0F0FF;
color: black;
line-height: 120%;
font-size: 90%;
padding: 5px 10px;
}
.warning .label {
background: #C00;
color: white;
font-weight: bold;
padding: 5px 10px;
}
.warning .content {
background: #FFF0F0;
color: black;
line-height: 120%;
font-size: 90%;
padding: 5px 10px;
}
.fixme .label {
background: #C6C600;
color: black;
font-weight: bold;
padding: 5px 10px;
}
.fixme .content {
padding: 5px 10px;
}
/**
* Typography
*/
body {
font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
font-size: 100%;
}
#content {
font-family: Georgia, Palatino, Times, serif;
font-size: 95%;
}
#tabs {
font-size: 70%;
}
#menu {
font-size: 80%;
}
#footer {
font-size: 70%;
}
h1, h2, h3, h4, h5, h6 {
font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
font-weight: bold;
margin-top: 1em;
margin-bottom: .5em;
}
h1 {
margin-top: 0;
margin-bottom: 1em;
font-size: 1.4em;
background-color: 73CAFF
}
#content h1 {
font-size: 160%;
margin-bottom: .5em;
}
#menu h1 {
margin: 0;
padding: 10px;
background: #336699;
color: white;
}
h2 {
font-size: 120%;
background-color: 73CAFF
}
h3 { font-size: 100%; }
h4 { font-size: 90%; }
h5 { font-size: 80%; }
h6 { font-size: 75%; }
p {
line-height: 120%;
text-align: left;
margin-top: .5em;
margin-bottom: 1em;
}
#content li,
#content th,
#content td,
#content li ul,
#content li ol{
margin-top: .5em;
margin-bottom: .5em;
}
#content li li,
#minitoc-area li{
margin-top: 0em;
margin-bottom: 0em;
}
#content .attribution {
text-align: right;
font-style: italic;
font-size: 85%;
margin-top: 1em;
}
.codefrag {
font-family: "Courier New", Courier, monospace;
font-size: 110%;
}

View File

@ -0,0 +1,49 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
body {
font-family: Courier New, monospace;
font-size: 10pt;
}
h1 {
font-family: Courier New, monospace;
font-size: 10pt;
}
h2 {
font-family: Courier New, monospace;
font-size: 10pt;
}
h3 {
font-family: Courier New, monospace;
font-size: 10pt;
}
a:link {
color: blue;
}
a:visited {
color: purple;
}
li {
margin-top: 1em;
margin-bottom: 1em;
}

View File

@ -0,0 +1,282 @@
#!/usr/bin/perl
#
# Transforms Lucene Java's CHANGES.txt into Changes.html
#
# Input is on STDIN, output is to STDOUT
#
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
use strict;
use warnings;
my $jira_url_prefix = 'http://issues.apache.org/jira/browse/';
my $title = undef;
my $release = undef;
my $sections = undef;
my $items = undef;
my $first_relid = undef;
my $second_relid = undef;
my @releases = ();
my @lines = <>; # Get all input at once
#
# Parse input and build hierarchical release structure in @releases
#
for (my $line_num = 0 ; $line_num <= $#lines ; ++$line_num) {
$_ = $lines[$line_num];
next unless (/\S/); # Skip blank lines
unless ($title) {
if (/\S/) {
s/^\s+//; # Trim leading whitespace
s/\s+$//; # Trim trailing whitespace
}
$title = $_;
next;
}
if (/^(Release)|(Trunk)/) { # Release headings
$release = $_;
$sections = [];
push @releases, [ $release, $sections ];
($first_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 0);
($second_relid = lc($release)) =~ s/\s+/_/g if ($#releases == 1);
$items = undef;
next;
}
# Section heading: 2 leading spaces, words all capitalized
if (/^ ([A-Z]+)\s*/) {
my $heading = $_;
$items = [];
push @$sections, [ $heading, $items ];
next;
}
# Handle earlier releases without sections - create a headless section
unless ($items) {
$items = [];
push @$sections, [ undef, $items ];
}
my $type;
if (@$items) { # A list item has been encountered in this section before
$type = $items->[0]; # 0th position of items array is list type
} else {
$type = get_list_type($_);
push @$items, $type;
}
if ($type eq 'numbered') { # The modern items list style
# List item boundary is another numbered item or an unindented line
my $line;
my $item = $_;
$item =~ s/^(\s{0,2}\d+\.\s*)//; # Trim the leading item number
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^(?:\s{0,2}\d+\.\s*\S|\S)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
$item =~ s/\n+\Z/\n/; # Trim trailing blank lines
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} elsif ($type eq 'paragraph') { # List item boundary is a blank line
my $line;
my $item = $_;
$item =~ s/^(\s+)//;
my $leading_ws_width = defined($1) ? length($1) : 0;
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines and ($line = $lines[++$line_num]) =~ /\S/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
push @$items, $item;
--$line_num unless ($line_num == $#lines);
} else { # $type is one of the bulleted types
# List item boundary is another bullet or a blank line
my $line;
my $item = $_;
$item =~ s/^(\s*$type\s*)//; # Trim the leading bullet
my $leading_ws_width = length($1);
$item =~ s/\s+$//; # Trim trailing whitespace
$item .= "\n";
while ($line_num < $#lines
and ($line = $lines[++$line_num]) !~ /^\s*(?:$type|\Z)/) {
$line =~ s/^\s{$leading_ws_width}//; # Trim leading whitespace
$line =~ s/\s+$//; # Trim trailing whitespace
$item .= "$line\n";
}
push @$items, $item;
--$line_num unless ($line_num == $#lines);
}
}
#
# Print HTML-ified version to STDOUT
#
print<<"__HTML_HEADER__";
<!--
**********************************************************
** WARNING: This file is generated from CHANGES.txt by the
** Perl script 'changes2html.pl'.
** Do *not* edit this file!
**********************************************************
****************************************************************************
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
****************************************************************************
-->
<html>
<head>
<title>$title</title>
<link rel="stylesheet" href="ChangesFancyStyle.css" title="Fancy">
<link rel="alternate stylesheet" href="ChangesSimpleStyle.css" title="Simple">
<META http-equiv="Content-Type" content="text/html; charset=UTF-8"/>
<SCRIPT>
function toggleList(e) {
element = document.getElementById(e).style;
element.display == 'none' ? element.display = 'block' : element.display='none';
}
function collapse() {
for (var i = 0; i < document.getElementsByTagName("ul").length; i++) {
var list = document.getElementsByTagName("ul")[i];
if (list.id != '$first_relid' && list.id != '$second_relid') {
list.style.display = "none";
}
}
for (var i = 0; i < document.getElementsByTagName("ol").length; i++) {
document.getElementsByTagName("ol")[i].style.display = "none";
}
}
window.onload = collapse;
</SCRIPT>
</head>
<body>
<a href="http://hadoop.apache.org/core/"><img class="logoImage" alt="Hadoop" src="images/hadoop-logo.jpg" title="Scalable Computing Platform"></a>
<h1>$title</h1>
__HTML_HEADER__
my $heading;
my $relcnt = 0;
my $header = 'h2';
for my $rel (@releases) {
if (++$relcnt == 3) {
$header = 'h3';
print "<h2><a href=\"javascript:toggleList('older')\">";
print "Older Releases";
print "</a></h2>\n";
print "<ul id=\"older\">\n"
}
($release, $sections) = @$rel;
# The first section heading is undefined for the older sectionless releases
my $has_release_sections = $sections->[0][0];
(my $relid = lc($release)) =~ s/\s+/_/g;
print "<$header><a href=\"javascript:toggleList('$relid')\">";
print "$release";
print "</a></$header>\n";
print "<ul id=\"$relid\">\n"
if ($has_release_sections);
for my $section (@$sections) {
($heading, $items) = @$section;
(my $sectid = lc($heading)) =~ s/\s+/_/g;
my $numItemsStr = $#{$items} > 0 ? "($#{$items})" : "(none)";
print " <li><a href=\"javascript:toggleList('$relid.$sectid')\">",
($heading || ''), "</a>&nbsp;&nbsp;&nbsp;$numItemsStr\n"
if ($has_release_sections);
my $list_type = $items->[0] || '';
my $list = ($has_release_sections || $list_type eq 'numbered' ? 'ol' : 'ul');
my $listid = $sectid ? "$relid.$sectid" : $relid;
print " <$list id=\"$listid\">\n";
for my $itemnum (1..$#{$items}) {
my $item = $items->[$itemnum];
$item =~ s:&:&amp;:g; # Escape HTML metachars
$item =~ s:<:&lt;:g;
$item =~ s:>:&gt;:g;
$item =~ s:\s*(\([^)"]+?\))\s*$:<br />$1:; # Separate attribution
$item =~ s:\n{2,}:\n<p/>\n:g; # Keep paragraph breaks
$item =~ s{(?:${jira_url_prefix})?(HADOOP-\d+)} # Link to JIRA
{<a href="${jira_url_prefix}$1">$1</a>}g;
print " <li>$item</li>\n";
}
print " </$list>\n";
print " </li>\n" if ($has_release_sections);
}
print "</ul>\n" if ($has_release_sections);
}
print "</ul>\n" if ($relcnt > 3);
print "</body>\n</html>\n";
#
# Subroutine: get_list_type
#
# Takes one parameter:
#
# - The first line of a sub-section/point
#
# Returns one scalar:
#
# - The list type: 'numbered'; or one of the bulleted types '-', or '.' or
# 'paragraph'.
#
sub get_list_type {
my $first_list_item_line = shift;
my $type = 'paragraph'; # Default to paragraph type
if ($first_list_item_line =~ /^\s{0,2}\d+\.\s+\S+/) {
$type = 'numbered';
} elsif ($first_list_item_line =~ /^\s*([-.])\s+\S+/) {
$type = $1;
}
return $type;
}
1;

View File

@ -0,0 +1,109 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
##############
# Properties used by forrest.build.xml for building the website
# These are the defaults, un-comment them if you need to change them.
##############
# Prints out a summary of Forrest settings for this project
#forrest.echo=true
# Project name (used to name .war file)
#project.name=my-project
# Specifies name of Forrest skin to use
#project.skin=tigris
#project.skin=pelt
# comma separated list, file:// is supported
#forrest.skins.descriptors=http://forrest.apache.org/skins/skins.xml,file:///c:/myskins/skins.xml
##############
# behavioural properties
#project.menu-scheme=tab_attributes
#project.menu-scheme=directories
##############
# layout properties
# Properties that can be set to override the default locations
#
# Parent properties must be set. This usually means uncommenting
# project.content-dir if any other property using it is uncommented
#project.status=status.xml
#project.content-dir=src/documentation
#project.raw-content-dir=${project.content-dir}/content
#project.conf-dir=${project.content-dir}/conf
#project.sitemap-dir=${project.content-dir}
#project.xdocs-dir=${project.content-dir}/content/xdocs
#project.resources-dir=${project.content-dir}/resources
#project.stylesheets-dir=${project.resources-dir}/stylesheets
#project.images-dir=${project.resources-dir}/images
#project.schema-dir=${project.resources-dir}/schema
#project.skins-dir=${project.content-dir}/skins
#project.skinconf=${project.content-dir}/skinconf.xml
#project.lib-dir=${project.content-dir}/lib
#project.classes-dir=${project.content-dir}/classes
#project.translations-dir=${project.content-dir}/translations
##############
# validation properties
# This set of properties determine if validation is performed
# Values are inherited unless overridden.
# e.g. if forrest.validate=false then all others are false unless set to true.
#forrest.validate=true
#forrest.validate.xdocs=${forrest.validate}
#forrest.validate.skinconf=${forrest.validate}
#forrest.validate.sitemap=${forrest.validate}
#forrest.validate.stylesheets=${forrest.validate}
#forrest.validate.skins=${forrest.validate}
#forrest.validate.skins.stylesheets=${forrest.validate.skins}
# *.failonerror=(true|false) - stop when an XML file is invalid
#forrest.validate.failonerror=true
# *.excludes=(pattern) - comma-separated list of path patterns to not validate
# e.g.
#forrest.validate.xdocs.excludes=samples/subdir/**, samples/faq.xml
#forrest.validate.xdocs.excludes=
##############
# General Forrest properties
# The URL to start crawling from
#project.start-uri=linkmap.html
# Set logging level for messages printed to the console
# (DEBUG, INFO, WARN, ERROR, FATAL_ERROR)
#project.debuglevel=ERROR
# Max memory to allocate to Java
forrest.maxmemory=512m
# Any other arguments to pass to the JVM. For example, to run on an X-less
# server, set to -Djava.awt.headless=true
#forrest.jvmargs=
# The bugtracking URL - the issue number will be appended
#project.bugtracking-url=http://issues.apache.org/bugzilla/show_bug.cgi?id=
#project.bugtracking-url=http://issues.apache.org/jira/browse/
# The issues list as rss
#project.issues-rss-url=
#I18n Property only works for the "forrest run" target.
#project.i18n=true
project.configfile=${project.home}/src/documentation/conf/cli.xconf

View File

@ -0,0 +1,7 @@
This is the base documentation directory.
skinconf.xml # This file customizes Forrest for your project. In it, you
# tell forrest the project name, logo, copyright info, etc
sitemap.xmap # Optional. This sitemap is consulted before all core sitemaps.
# See http://forrest.apache.org/docs/project-sitemap.html

View File

@ -0,0 +1,40 @@
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#=======================================================================
# CatalogManager.properties
#
# This is the default properties file for Apache Forrest.
# This facilitates local configuration of application-specific catalogs.
#
# See the Apache Forrest documentation:
# http://forrest.apache.org/docs/your-project.html
# http://forrest.apache.org/docs/validation.html
# verbosity ... level of messages for status/debug
# See forrest/src/core/context/WEB-INF/cocoon.xconf
# catalogs ... list of additional catalogs to load
# (Note that Apache Forrest will automatically load its own default catalog
# from src/core/context/resources/schema/catalog.xcat)
# use full pathnames
# pathname separator is always semi-colon (;) regardless of operating system
# directory separator is always slash (/) regardless of operating system
#
#catalogs=/home/me/forrest/my-site/src/documentation/resources/schema/catalog.xcat
catalogs=

View File

@ -0,0 +1,327 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--+
| This is the Apache Cocoon command line configuration file.
| Here you give the command line interface details of where
| to find various aspects of your Cocoon installation.
|
| If you wish, you can also use this file to specify the URIs
| that you wish to generate.
|
| The current configuration information in this file is for
| building the Cocoon documentation. Therefore, all links here
| are relative to the build context dir, which, in the build.xml
| file, is set to ${build.context}
|
| Options:
| verbose: increase amount of information presented
| to standard output (default: false)
| follow-links: whether linked pages should also be
| generated (default: true)
| precompile-only: precompile sitemaps and XSP pages, but
| do not generate any pages (default: false)
| confirm-extensions: check the mime type for the generated page
| and adjust filename and links extensions
| to match the mime type
| (e.g. text/html->.html)
|
| Note: Whilst using an xconf file to configure the Cocoon
| Command Line gives access to more features, the use of
| command line parameters is more stable, as there are
| currently plans to improve the xconf format to allow
| greater flexibility. If you require a stable and
| consistent method for accessing the CLI, it is recommended
| that you use the command line parameters to configure
| the CLI. See documentation at:
| http://cocoon.apache.org/2.1/userdocs/offline/
| http://wiki.apache.org/cocoon/CommandLine
|
+-->
<cocoon verbose="true"
follow-links="true"
precompile-only="false"
confirm-extensions="false">
<!--+
| The context directory is usually the webapp directory
| containing the sitemap.xmap file.
|
| The config file is the cocoon.xconf file.
|
| The work directory is used by Cocoon to store temporary
| files and cache files.
|
| The destination directory is where generated pages will
| be written (assuming the 'simple' mapper is used, see
| below)
+-->
<context-dir>.</context-dir>
<config-file>WEB-INF/cocoon.xconf</config-file>
<work-dir>../tmp/cocoon-work</work-dir>
<dest-dir>../site</dest-dir>
<!--+
| A checksum file can be used to store checksums for pages
| as they are generated. When the site is next generated,
| files will not be written if their checksum has not changed.
| This means that it will be easier to detect which files
| need to be uploaded to a server, using the timestamp.
|
| The default path is relative to the core webapp directory.
| An asolute path can be used.
+-->
<!-- <checksums-uri>build/work/checksums</checksums-uri>-->
<!--+
| Broken link reporting options:
| Report into a text file, one link per line:
| <broken-links type="text" report="filename"/>
| Report into an XML file:
| <broken-links type="xml" report="filename"/>
| Ignore broken links (default):
| <broken-links type="none"/>
|
| Two attributes to this node specify whether a page should
| be generated when an error has occured. 'generate' specifies
| whether a page should be generated (default: true) and
| extension specifies an extension that should be appended
| to the generated page's filename (default: none)
|
| Using this, a quick scan through the destination directory
| will show broken links, by their filename extension.
+-->
<broken-links type="xml"
file="../brokenlinks.xml"
generate="false"
extension=".error"
show-referrers="true"/>
<!--+
| Load classes at startup. This is necessary for generating
| from sites that use SQL databases and JDBC.
| The <load-class> element can be repeated if multiple classes
| are needed.
+-->
<!--
<load-class>org.firebirdsql.jdbc.Driver</load-class>
-->
<!--+
| Configures logging.
| The 'log-kit' parameter specifies the location of the log kit
| configuration file (usually called logkit.xconf.
|
| Logger specifies the logging category (for all logging prior
| to other Cocoon logging categories taking over)
|
| Available log levels are:
| DEBUG: prints all level of log messages.
| INFO: prints all level of log messages except DEBUG
| ones.
| WARN: prints all level of log messages except DEBUG
| and INFO ones.
| ERROR: prints all level of log messages except DEBUG,
| INFO and WARN ones.
| FATAL_ERROR: prints only log messages of this level
+-->
<!-- <logging log-kit="WEB-INF/logkit.xconf" logger="cli" level="ERROR" /> -->
<!--+
| Specifies the filename to be appended to URIs that
| refer to a directory (i.e. end with a forward slash).
+-->
<default-filename>index.html</default-filename>
<!--+
| Specifies a user agent string to the sitemap when
| generating the site.
|
| A generic term for a web browser is "user agent". Any
| user agent, when connecting to a web server, will provide
| a string to identify itself (e.g. as Internet Explorer or
| Mozilla). It is possible to have Cocoon serve different
| content depending upon the user agent string provided by
| the browser. If your site does this, then you may want to
| use this <user-agent> entry to provide a 'fake' user agent
| to Cocoon, so that it generates the correct version of your
| site.
|
| For most sites, this can be ignored.
+-->
<!--
<user-agent>Cocoon Command Line Environment 2.1</user-agent>
-->
<!--+
| Specifies an accept string to the sitemap when generating
| the site.
| User agents can specify to an HTTP server what types of content
| (by mime-type) they are able to receive. E.g. a browser may be
| able to handle jpegs, but not pngs. The HTTP accept header
| allows the server to take the browser's capabilities into account,
| and only send back content that it can handle.
|
| For most sites, this can be ignored.
+-->
<accept>*/*</accept>
<!--+
| Specifies which URIs should be included or excluded, according
| to wildcard patterns.
|
| These includes/excludes are only relevant when you are following
| links. A link URI must match an include pattern (if one is given)
| and not match an exclude pattern, if it is to be followed by
| Cocoon. It can be useful, for example, where there are links in
| your site to pages that are not generated by Cocoon, such as
| references to api-documentation.
|
| By default, all URIs are included. If both include and exclude
| patterns are specified, a URI is first checked against the
| include patterns, and then against the exclude patterns.
|
| Multiple patterns can be given, using muliple include or exclude
| nodes.
|
| The order of the elements is not significant, as only the first
| successful match of each category is used.
|
| Currently, only the complete source URI can be matched (including
| any URI prefix). Future plans include destination URI matching
| and regexp matching. If you have requirements for these, contact
| dev@cocoon.apache.org.
+-->
<exclude pattern="**/"/>
<exclude pattern="api/**"/>
<exclude pattern="jdiff/**"/>
<exclude pattern="changes.html"/>
<exclude pattern="releasenotes.html"/>
<!--
This is a workaround for FOR-284 "link rewriting broken when
linking to xml source views which contain site: links".
See the explanation there and in declare-broken-site-links.xsl
-->
<exclude pattern="site:**"/>
<exclude pattern="ext:**"/>
<exclude pattern="lm:**"/>
<exclude pattern="**/site:**"/>
<exclude pattern="**/ext:**"/>
<exclude pattern="**/lm:**"/>
<!-- Exclude tokens used in URLs to ASF mirrors (interpreted by a CGI) -->
<exclude pattern="[preferred]/**"/>
<exclude pattern="[location]"/>
<!-- <include-links extension=".html"/>-->
<!--+
| <uri> nodes specify the URIs that should be generated, and
| where required, what should be done with the generated pages.
| They describe the way the URI of the generated file is created
| from the source page's URI. There are three ways that a generated
| file URI can be created: append, replace and insert.
|
| The "type" attribute specifies one of (append|replace|insert):
|
| append:
| Append the generated page's URI to the end of the source URI:
|
| <uri type="append" src-prefix="documents/" src="index.html"
| dest="build/dest/"/>
|
| This means that
| (1) the "documents/index.html" page is generated
| (2) the file will be written to "build/dest/documents/index.html"
|
| replace:
| Completely ignore the generated page's URI - just
| use the destination URI:
|
| <uri type="replace" src-prefix="documents/" src="index.html"
| dest="build/dest/docs.html"/>
|
| This means that
| (1) the "documents/index.html" page is generated
| (2) the result is written to "build/dest/docs.html"
| (3) this works only for "single" pages - and not when links
| are followed
|
| insert:
| Insert generated page's URI into the destination
| URI at the point marked with a * (example uses fictional
| zip protocol)
|
| <uri type="insert" src-prefix="documents/" src="index.html"
| dest="zip://*.zip/page.html"/>
|
| This means that
| (1)
|
| In any of these scenarios, if the dest attribute is omitted,
| the value provided globally using the <dest-dir> node will
| be used instead.
+-->
<!--
<uri type="replace"
src-prefix="samples/"
src="hello-world/hello.html"
dest="build/dest/hello-world.html"/>
-->
<!--+
| <uri> nodes can be grouped together in a <uris> node. This
| enables a group of URIs to share properties. The following
| properties can be set for a group of URIs:
| * follow-links: should pages be crawled for links
| * confirm-extensions: should file extensions be checked
| for the correct mime type
| * src-prefix: all source URIs should be
| pre-pended with this prefix before
| generation. The prefix is not
| included when calculating the
| destination URI
| * dest: the base destination URI to be
| shared by all pages in this group
| * type: the method to be used to calculate
| the destination URI. See above
| section on <uri> node for details.
|
| Each <uris> node can have a name attribute. When a name
| attribute has been specified, the -n switch on the command
| line can be used to tell Cocoon to only process the URIs
| within this URI group. When no -n switch is given, all
| <uris> nodes are processed. Thus, one xconf file can be
| used to manage multiple sites.
+-->
<!--
<uris name="mirrors" follow-links="false">
<uri type="append" src="mirrors.html"/>
</uris>
-->
<!--+
| File containing URIs (plain text, one per line).
+-->
<!--
<uri-file>uris.txt</uri-file>
-->
</cocoon>

View File

@ -0,0 +1,386 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop集群搭建</title>
</header>
<body>
<section>
<title>目的</title>
<p>本文描述了如何安装、配置和管理有实际意义的Hadoop集群其规模可从几个节点的小集群到几千个节点的超大集群。</p>
<p>如果你希望在单机上安装Hadoop玩玩<a href="quickstart.html">这里</a>能找到相关细节。</p>
</section>
<section>
<title>先决条件</title>
<ol>
<li>
确保在你集群中的每个节点上都安装了所有<a href="quickstart.html#PreReqs">必需</a>软件。
</li>
<li>
<a href="quickstart.html#下载">获取</a>Hadoop软件包。
</li>
</ol>
</section>
<section>
<title>安装</title>
<p>安装Hadoop集群通常要将安装软件解压到集群内的所有机器上。</p>
<p>通常,集群里的一台机器被指定为
<code>NameNode</code>,另一台不同的机器被指定为<code>JobTracker</code>。这些机器是<em>masters</em>。余下的机器即作为<code>DataNode</code><em></em>作为<code>TaskTracker</code>。这些机器是<em>slaves</em></p>
<p>我们用<code>HADOOP_HOME</code>指代安装的根路径。通常,集群里的所有机器的<code>HADOOP_HOME</code>路径相同。</p>
</section>
<section>
<title>配置</title>
<p>接下来的几节描述了如何配置Hadoop集群。</p>
<section>
<title>配置文件</title>
<p>对Hadoop的配置通过<code>conf/</code>目录下的两个重要配置文件完成:</p>
<ol>
<li>
<a href="ext:hadoop-default">hadoop-default.xml</a> - 只读的默认配置。
</li>
<li>
<em>hadoop-site.xml</em> - 集群特有的配置。
</li>
</ol>
<p>要了解更多关于这些配置文件如何影响Hadoop框架的细节请看<a href="ext:api/org/apache/hadoop/conf/configuration">这里</a></p>
<p>此外,通过设置<code>conf/hadoop-env.sh</code>中的变量为集群特有的值,你可以对<code>bin/</code>目录下的Hadoop脚本进行控制。</p>
</section>
<section>
<title>集群配置</title>
<p>要配置Hadoop集群你需要设置Hadoop守护进程的<em>运行环境</em>和Hadoop守护进程的<em>运行参数</em></p>
<p>Hadoop守护进程指<code>NameNode</code>/<code>DataNode</code>
<code>JobTracker</code>/<code>TaskTracker</code></p>
<section>
<title>配置Hadoop守护进程的运行环境</title>
<p>管理员可在<code>conf/hadoop-env.sh</code>脚本内对Hadoop守护进程的运行环境做特别指定。</p>
<p>至少,你得设定<code>JAVA_HOME</code>使之在每一远端节点上都被正确设置。</p>
<p>管理员可以通过配置选项<code>HADOOP_*_OPTS</code>来分别配置各个守护进程。
下表是可以配置的选项。
</p>
<table>
<tr><th>守护进程</th><th>配置选项</th></tr>
<tr><td>NameNode</td><td>HADOOP_NAMENODE_OPTS</td></tr>
<tr><td>DataNode</td><td>HADOOP_DATANODE_OPTS</td></tr>
<tr><td>SecondaryNamenode</td>
<td>HADOOP_SECONDARYNAMENODE_OPTS</td></tr>
<tr><td>JobTracker</td><td>HADOOP_JOBTRACKER_OPTS</td></tr>
<tr><td>TaskTracker</td><td>HADOOP_TASKTRACKER_OPTS</td></tr>
</table>
<p>例如配置Namenode时,为了使其能够并行回收垃圾parallelGC
要把下面的代码加入到<code>hadoop-env.sh</code> :
<br/><code>
export HADOOP_NAMENODE_OPTS="-XX:+UseParallelGC ${HADOOP_NAMENODE_OPTS}"
</code><br/></p>
<p>其它可定制的常用参数还包括:</p>
<ul>
<li>
<code>HADOOP_LOG_DIR</code> - 守护进程日志文件的存放目录。如果不存在会被自动创建。
</li>
<li>
<code>HADOOP_HEAPSIZE</code> - 最大可用的堆大小单位为MB。比如<code>1000MB</code>
这个参数用于设置hadoop守护进程的堆大小。缺省大小是<code>1000MB</code>
</li>
</ul>
</section>
<section>
<title>配置Hadoop守护进程的运行参数</title>
<p>这部分涉及Hadoop集群的重要参数这些参数在<code>conf/hadoop-site.xml</code>中指定。</p>
<table>
<tr>
<th>参数</th>
<th>取值</th>
<th>备注</th>
</tr>
<tr>
<td>fs.default.name</td>
<td><code>NameNode</code>的URI。</td>
<td><em>hdfs://主机名/</em></td>
</tr>
<tr>
<td>mapred.job.tracker</td>
<td><code>JobTracker</code>的主机或者IP和端口。</td>
<td><em>主机:端口</em></td>
</tr>
<tr>
<td>dfs.name.dir</td>
<td>
<code>NameNode</code>持久存储名字空间及事务日志的本地文件系统路径。</td>
<td>当这个值是一个逗号分割的目录列表时nametable数据将会被复制到所有目录中做冗余备份。
</td>
</tr>
<tr>
<td>dfs.data.dir</td>
<td>
<code>DataNode</code>存放块数据的本地文件系统路径,逗号分割的列表。
</td>
<td>
当这个值是逗号分割的目录列表时,数据将被存储在所有目录下,通常分布在不同设备上。
</td>
</tr>
<tr>
<td>mapred.system.dir</td>
<td>Map/Reduce框架存储系统文件的HDFS路径。比如<code>/hadoop/mapred/system/</code>
</td>
<td>这个路径是默认文件系统HDFS下的路径 须从服务器和客户端上均可访问。
</td>
</tr>
<tr>
<td>mapred.local.dir</td>
<td>本地文件系统下逗号分割的路径列表Map/Reduce临时数据存放的地方。
</td>
<td>多路径有助于利用磁盘i/o。</td>
</tr>
<tr>
<td>mapred.tasktracker.{map|reduce}.tasks.maximum</td>
<td>某一<code>TaskTracker</code>上可运行的最大Map/Reduce任务数这些任务将同时各自运行。
</td>
<td>
默认为22个map和2个reduce可依据硬件情况更改。
</td>
</tr>
<tr>
<td>dfs.hosts/dfs.hosts.exclude</td>
<td>许可/拒绝DataNode列表。</td>
<td>
如有必要用这个文件控制许可的datanode列表。
</td>
</tr>
<tr>
<td>mapred.hosts/mapred.hosts.exclude</td>
<td>许可/拒绝TaskTracker列表。</td>
<td>
如有必要用这个文件控制许可的TaskTracker列表。
</td>
</tr>
</table>
<p>通常,上述参数被标记为
<a href="ext:api/org/apache/hadoop/conf/configuration/final_parameters">
final</a> 以确保它们不被用户应用更改。
</p>
<section>
<title>现实世界的集群配置</title>
<p>这节罗列在大规模集群上运行<em>sort</em>基准测试(benchmark)时使用到的一些非缺省配置。</p>
<ul>
<li>
<p>运行sort900的一些非缺省配置值sort900即在900个节点的集群上对9TB的数据进行排序</p>
<table>
<tr>
<th>参数</th>
<th>取值</th>
<th>备注</th>
</tr>
<tr>
<td>dfs.block.size</td>
<td>134217728</td>
<td>针对大文件系统HDFS的块大小取128MB。</td>
</tr>
<tr>
<td>dfs.namenode.handler.count</td>
<td>40</td>
<td>
启动更多的NameNode服务线程去处理来自大量DataNode的RPC请求。
</td>
</tr>
<tr>
<td>mapred.reduce.parallel.copies</td>
<td>20</td>
<td>
reduce启动更多的并行拷贝器以获取大量map的输出。
</td>
</tr>
<tr>
<td>mapred.child.java.opts</td>
<td>-Xmx512M</td>
<td>
为map/reduce子虚拟机使用更大的堆。
</td>
</tr>
<tr>
<td>fs.inmemory.size.mb</td>
<td>200</td>
<td>
为reduce阶段合并map输出所需的内存文件系统分配更多的内存。
</td>
</tr>
<tr>
<td>io.sort.factor</td>
<td>100</td>
<td>文件排序时更多的流将同时被归并。</td>
</tr>
<tr>
<td>io.sort.mb</td>
<td>200</td>
<td>提高排序时的内存上限。</td>
</tr>
<tr>
<td>io.file.buffer.size</td>
<td>131072</td>
<td>SequenceFile中用到的读/写缓存大小。</td>
</tr>
</table>
</li>
<li>
<p>运行sort1400和sort2000时需要更新的配置即在1400个节点上对14TB的数据进行排序和在2000个节点上对20TB的数据进行排序</p>
<table>
<tr>
<th>参数</th>
<th>取值</th>
<th>备注</th>
</tr>
<tr>
<td>mapred.job.tracker.handler.count</td>
<td>60</td>
<td>
启用更多的JobTracker服务线程去处理来自大量TaskTracker的RPC请求。
</td>
</tr>
<tr>
<td>mapred.reduce.parallel.copies</td>
<td>50</td>
<td></td>
</tr>
<tr>
<td>tasktracker.http.threads</td>
<td>50</td>
<td>
为TaskTracker的Http服务启用更多的工作线程。reduce通过Http服务获取map的中间输出。
</td>
</tr>
<tr>
<td>mapred.child.java.opts</td>
<td>-Xmx1024M</td>
<td>使用更大的堆用于maps/reduces的子虚拟机</td>
</tr>
</table>
</li>
</ul>
</section>
</section>
<section>
<title>Slaves</title>
<p>通常,你选择集群中的一台机器作为<code>NameNode</code>,另外一台不同的机器作为<code>JobTracker</code>。余下的机器即作为<code>DataNode</code>又作为<code>TaskTracker</code>,这些被称之为<em>slaves</em></p>
<p><code>conf/slaves</code>文件中列出所有slave的主机名或者IP地址一行一个。</p>
</section>
<section>
<title>日志</title>
<p>Hadoop使用<a href="http://logging.apache.org/log4j/">Apache log4j</a>来记录日志,它由<a href="http://commons.apache.org/logging/">Apache Commons Logging</a>框架来实现。编辑<code>conf/log4j.properties</code>文件可以改变Hadoop守护进程的日志配置日志格式等</p>
<section>
<title>历史日志</title>
<p>作业的历史文件集中存放在<code>hadoop.job.history.location</code>,这个也可以是在分布式文件系统下的路径,其默认值为<code>${HADOOP_LOG_DIR}/history</code>。jobtracker的web UI上有历史日志的web UI链接。</p>
<p>历史文件在用户指定的目录<code>hadoop.job.history.user.location</code>也会记录一份这个配置的缺省值为作业的输出目录。这些文件被存放在指定路径下的“_logs/history/”目录中。因此默认情况下日志文件会在“mapred.output.dir/_logs/history/”下。如果将<code>hadoop.job.history.user.location</code>指定为值<code>none</code>,系统将不再记录此日志。</p>
<p>用户可使用以下命令在指定路径下查看历史日志汇总<br/>
<code>$ bin/hadoop job -history output-dir</code><br/>
这条命令会显示作业的细节信息,失败和终止的任务细节。 <br/>
关于作业的更多细节,比如成功的任务,以及对每个任务的所做的尝试次数等可以用下面的命令查看<br/>
<code>$ bin/hadoop job -history all output-dir</code><br/></p>
</section>
</section>
</section>
<p>一但全部必要的配置完成,将这些文件分发到所有机器的<code>HADOOP_CONF_DIR</code>路径下,通常是<code>${HADOOP_HOME}/conf</code></p>
</section>
<section>
<title>Hadoop的机架感知</title>
<p>HDFS和Map/Reduce的组件是能够感知机架的。</p>
<p><code>NameNode</code><code>JobTracker</code>通过调用管理员配置模块中的API<a href="ext:api/org/apache/hadoop/net/dnstoswitchmapping/resolve">resolve</a>来获取集群里每个slave的<code>机架id</code>。该API将slave的DNS名称或者IP地址转换成机架id。使用哪个模块是通过配置项<code>topology.node.switch.mapping.impl</code>来指定的。模块的默认实现会调用<code>topology.script.file.name</code>配置项指定的一个的脚本/命令。 如果topology.script.file.name未被设置对于所有传入的IP地址模块会返回<code>/default-rack</code>作为机架id。在Map/Reduce部分还有一个额外的配置项<code>mapred.cache.task.levels</code>该参数决定cache的级数在网络拓扑中。例如如果默认值是2会建立两级的cache 一级针对主机(主机 -> 任务的映射)另一级针对机架(机架 -> 任务的映射)。
</p>
</section>
<section>
<title>启动Hadoop</title>
<p>启动Hadoop集群需要启动HDFS集群和Map/Reduce集群。</p>
<p>
格式化一个新的分布式文件系统:<br/>
<code>$ bin/hadoop namenode -format</code>
</p>
<p>
在分配的<code>NameNode</code>运行下面的命令启动HDFS<br/>
<code>$ bin/start-dfs.sh</code>
</p>
<p><code>bin/start-dfs.sh</code>脚本会参照<code>NameNode</code><code>${HADOOP_CONF_DIR}/slaves</code>文件的内容在所有列出的slave上启动<code>DataNode</code>守护进程。</p>
<p>
在分配的<code>JobTracker</code>运行下面的命令启动Map/Reduce<br/>
<code>$ bin/start-mapred.sh</code>
</p>
<p><code>bin/start-mapred.sh</code>脚本会参照<code>JobTracker</code><code>${HADOOP_CONF_DIR}/slaves</code>文件的内容在所有列出的slave上启动<code>TaskTracker</code>守护进程。</p>
</section>
<section>
<title>停止Hadoop</title>
<p>
在分配的<code>NameNode</code>执行下面的命令停止HDFS<br/>
<code>$ bin/stop-dfs.sh</code>
</p>
<p><code>bin/stop-dfs.sh</code>脚本会参照<code>NameNode</code><code>${HADOOP_CONF_DIR}/slaves</code>文件的内容在所有列出的slave上停止<code>DataNode</code>守护进程。</p>
<p>
在分配的<code>JobTracker</code>运行下面的命令停止Map/Reduce<br/>
<code>$ bin/stop-mapred.sh</code><br/>
</p>
<p><code>bin/stop-mapred.sh</code>脚本会参照<code>JobTracker</code><code>${HADOOP_CONF_DIR}/slaves</code>文件的内容在所有列出的slave上停止<code>TaskTracker</code>守护进程。</p>
</section>
</body>
</document>

View File

@ -0,0 +1,596 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>命令手册</title>
</header>
<body>
<section>
<title>概述</title>
<p>
所有的hadoop命令均由bin/hadoop脚本引发。不指定参数运行hadoop脚本会打印所有命令的描述。
</p>
<p>
<code>用法hadoop [--config confdir] [COMMAND] [GENERIC_OPTIONS] [COMMAND_OPTIONS]</code>
</p>
<p>
Hadoop有一个选项解析框架用于解析一般的选项和运行类。
</p>
<table>
<tr><th> 命令选项 </th><th> 描述 </th></tr>
<tr>
<td><code>--config confdir</code></td>
<td>覆盖缺省配置目录。缺省是${HADOOP_HOME}/conf。</td>
</tr>
<tr>
<td><code>GENERIC_OPTIONS</code></td>
<td>多个命令都支持的通用选项。</td>
</tr>
<tr>
<td><code>COMMAND</code><br/><code>命令选项S</code></td>
<td>各种各样的命令和它们的选项会在下面提到。这些命令被分为
<a href="commands_manual.html#用户命令">用户命令</a>
<a href="commands_manual.html#管理命令">管理命令</a>两组。</td>
</tr>
</table>
<section>
<title>常规选项</title>
<p>
下面的选项被
<a href="commands_manual.html#dfsadmin">dfsadmin</a>,
<a href="commands_manual.html#fs">fs</a>, <a href="commands_manual.html#fsck">fsck</a>
<a href="commands_manual.html#job">job</a>支持。
应用程序要实现
<a href="ext:api/org/apache/hadoop/util/tool">Tool</a>来支持
<a href="ext:api/org/apache/hadoop/util/genericoptionsparser">
常规选项</a>
</p>
<table>
<tr><th> GENERIC_OPTION </th><th> 描述 </th></tr>
<tr>
<td><code>-conf &lt;configuration file&gt;</code></td>
<td>指定应用程序的配置文件。</td>
</tr>
<tr>
<td><code>-D &lt;property=value&gt;</code></td>
<td>为指定property指定值value。</td>
</tr>
<tr>
<td><code>-fs &lt;local|namenode:port&gt;</code></td>
<td>指定namenode。</td>
</tr>
<tr>
<td><code>-jt &lt;local|jobtracker:port&gt;</code></td>
<td>指定job tracker。只适用于<a href="commands_manual.html#job">job</a></td>
</tr>
<tr>
<td><code>-files &lt;逗号分隔的文件列表&gt;</code></td>
<td>指定要拷贝到map reduce集群的文件的逗号分隔的列表。
只适用于<a href="commands_manual.html#job">job</a></td>
</tr>
<tr>
<td><code>-libjars &lt;逗号分隔的jar列表&gt;</code></td>
<td>指定要包含到classpath中的jar文件的逗号分隔的列表。
只适用于<a href="commands_manual.html#job">job</a></td>
</tr>
<tr>
<td><code>-archives &lt;逗号分隔的archive列表&gt;</code></td>
<td>指定要被解压到计算节点上的档案文件的逗号分割的列表。
只适用于<a href="commands_manual.html#job">job</a></td>
</tr>
</table>
</section>
</section>
<section>
<title> 用户命令 </title>
<p>hadoop集群用户的常用命令。</p>
<section>
<title> archive </title>
<p>
创建一个hadoop档案文件。参考 <a href="hadoop_archives.html">Hadoop Archives</a>.
</p>
<p>
<code>用法hadoop archive -archiveName NAME &lt;src&gt;* &lt;dest&gt;</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-archiveName NAME</code></td>
<td>要创建的档案的名字。</td>
</tr>
<tr>
<td><code>src</code></td>
<td>文件系统的路径名,和通常含正则表达的一样。</td>
</tr>
<tr>
<td><code>dest</code></td>
<td>保存档案文件的目标目录。</td>
</tr>
</table>
</section>
<section>
<title> distcp </title>
<p>
递归地拷贝文件或目录。参考<a href="distcp.html">DistCp指南</a>以获取等多信息。
</p>
<p>
<code>用法hadoop distcp &lt;srcurl&gt; &lt;desturl&gt;</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>srcurl</code></td>
<td>源Url</td>
</tr>
<tr>
<td><code>desturl</code></td>
<td>目标Url</td>
</tr>
</table>
</section>
<section>
<title> fs </title>
<p>
<code>用法hadoop fs [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>]
[COMMAND_OPTIONS]</code>
</p>
<p>
运行一个常规的文件系统客户端。
</p>
<p>
各种命令选项可以参考<a href="hdfs_shell.html">HDFS Shell指南</a>
</p>
</section>
<section>
<title> fsck </title>
<p>
运行HDFS文件系统检查工具。参考<a href="hdfs_user_guide.html#fsck">Fsck</a>了解更多。
</p>
<p><code>用法hadoop fsck [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>]
&lt;path&gt; [-move | -delete | -openforwrite] [-files [-blocks
[-locations | -racks]]]</code></p>
<table>
<tr><th> 命令选项 </th><th> 描述 </th></tr>
<tr>
<td><code>&lt;path&gt;</code></td>
<td>检查的起始目录。</td>
</tr>
<tr>
<td><code>-move</code></td>
<td>移动受损文件到/lost+found</td>
</tr>
<tr>
<td><code>-delete</code></td>
<td>删除受损文件。</td>
</tr>
<tr>
<td><code>-openforwrite</code></td>
<td>打印出写打开的文件。</td>
</tr>
<tr>
<td><code>-files</code></td>
<td>打印出正被检查的文件。</td>
</tr>
<tr>
<td><code>-blocks</code></td>
<td>打印出块信息报告。</td>
</tr>
<tr>
<td><code>-locations</code></td>
<td>打印出每个块的位置信息。</td>
</tr>
<tr>
<td><code>-racks</code></td>
<td>打印出data-node的网络拓扑结构。</td>
</tr>
</table>
</section>
<section>
<title> jar </title>
<p>
运行jar文件。用户可以把他们的Map Reduce代码捆绑到jar文件中使用这个命令执行。
</p>
<p>
<code>用法hadoop jar &lt;jar&gt; [mainClass] args...</code>
</p>
<p>
streaming作业是通过这个命令执行的。参考<a href="streaming.html#其他例子">Streaming examples</a>中的例子。
</p>
<p>
Word count例子也是通过jar命令运行的。参考<a href="mapred_tutorial.html#用法">Wordcount example</a>
</p>
</section>
<section>
<title> job </title>
<p>
用于和Map Reduce作业交互和命令。
</p>
<p>
<code>用法hadoop job [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>]
[-submit &lt;job-file&gt;] | [-status &lt;job-id&gt;] |
[-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;] | [-kill &lt;job-id&gt;] |
[-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;] | [-history [all] &lt;jobOutputDir&gt;] |
[-list [all]] | [-kill-task &lt;task-id&gt;] | [-fail-task &lt;task-id&gt;]</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-submit &lt;job-file&gt;</code></td>
<td>提交作业</td>
</tr>
<tr>
<td><code>-status &lt;job-id&gt;</code></td>
<td>打印map和reduce完成百分比和所有计数器。</td>
</tr>
<tr>
<td><code>-counter &lt;job-id&gt; &lt;group-name&gt; &lt;counter-name&gt;</code></td>
<td>打印计数器的值。</td>
</tr>
<tr>
<td><code>-kill &lt;job-id&gt;</code></td>
<td>杀死指定作业。</td>
</tr>
<tr>
<td><code>-events &lt;job-id&gt; &lt;from-event-#&gt; &lt;#-of-events&gt;</code></td>
<td>打印给定范围内jobtracker接收到的事件细节。</td>
</tr>
<tr>
<td><code>-history [all] &lt;jobOutputDir&gt;</code></td>
<td>-history &lt;jobOutputDir&gt; 打印作业的细节、失败及被杀死原因的细节。更多的关于一个作业的细节比如成功的任务,做过的任务尝试等信息可以通过指定[all]选项查看。
</td>
</tr>
<tr>
<td><code>-list [all]</code></td>
<td>-list all显示所有作业。-list只显示将要完成的作业。</td>
</tr>
<tr>
<td><code>-kill-task &lt;task-id&gt;</code></td>
<td>杀死任务。被杀死的任务不会不利于失败尝试。</td>
</tr>
<tr>
<td><code>-fail-task &lt;task-id&gt;</code></td>
<td>使任务失败。被失败的任务会对失败尝试不利。</td>
</tr>
</table>
</section>
<section>
<title> pipes </title>
<p>
运行pipes作业。
</p>
<p>
<code>用法hadoop pipes [-conf &lt;path&gt;] [-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...]
[-input &lt;path&gt;] [-output &lt;path&gt;] [-jar &lt;jar file&gt;] [-inputformat &lt;class&gt;]
[-map &lt;class&gt;] [-partitioner &lt;class&gt;] [-reduce &lt;class&gt;] [-writer &lt;class&gt;]
[-program &lt;executable&gt;] [-reduces &lt;num&gt;] </code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-conf &lt;path&gt;</code></td>
<td>作业的配置</td>
</tr>
<tr>
<td><code>-jobconf &lt;key=value&gt;, &lt;key=value&gt;, ...</code></td>
<td>增加/覆盖作业的配置项</td>
</tr>
<tr>
<td><code>-input &lt;path&gt;</code></td>
<td>输入目录</td>
</tr>
<tr>
<td><code>-output &lt;path&gt;</code></td>
<td>输出目录</td>
</tr>
<tr>
<td><code>-jar &lt;jar file&gt;</code></td>
<td>Jar文件名</td>
</tr>
<tr>
<td><code>-inputformat &lt;class&gt;</code></td>
<td>InputFormat类</td>
</tr>
<tr>
<td><code>-map &lt;class&gt;</code></td>
<td>Java Map类</td>
</tr>
<tr>
<td><code>-partitioner &lt;class&gt;</code></td>
<td>Java Partitioner</td>
</tr>
<tr>
<td><code>-reduce &lt;class&gt;</code></td>
<td>Java Reduce类</td>
</tr>
<tr>
<td><code>-writer &lt;class&gt;</code></td>
<td>Java RecordWriter</td>
</tr>
<tr>
<td><code>-program &lt;executable&gt;</code></td>
<td>可执行程序的URI</td>
</tr>
<tr>
<td><code>-reduces &lt;num&gt;</code></td>
<td>reduce个数</td>
</tr>
</table>
</section>
<section>
<title> version </title>
<p>
打印版本信息。
</p>
<p>
<code>用法hadoop version</code>
</p>
</section>
<section>
<title> CLASSNAME </title>
<p>
hadoop脚本可用于调调用任何类。
</p>
<p>
<code>用法hadoop CLASSNAME</code>
</p>
<p>
运行名字为CLASSNAME的类。
</p>
</section>
</section>
<section>
<title>管理命令</title>
<p>hadoop集群管理员常用的命令。</p>
<section>
<title> balancer </title>
<p>
运行集群平衡工具。管理员可以简单的按Ctrl-C来停止平衡过程。参考<a href="hdfs_user_guide.html#Rebalancer">Rebalancer</a>了解更多。
</p>
<p>
<code>用法hadoop balancer [-threshold &lt;threshold&gt;]</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-threshold &lt;threshold&gt;</code></td>
<td>磁盘容量的百分比。这会覆盖缺省的阀值。</td>
</tr>
</table>
</section>
<section>
<title> daemonlog </title>
<p>
获取或设置每个守护进程的日志级别。
</p>
<p>
<code>用法hadoop daemonlog -getlevel &lt;host:port&gt; &lt;name&gt;</code><br/>
<code>用法hadoop daemonlog -setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-getlevel &lt;host:port&gt; &lt;name&gt;</code></td>
<td>打印运行在&lt;host:port&gt;的守护进程的日志级别。这个命令内部会连接http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
</tr>
<tr>
<td><code>-setlevel &lt;host:port&gt; &lt;name&gt; &lt;level&gt;</code></td>
<td>设置运行在&lt;host:port&gt;的守护进程的日志级别。这个命令内部会连接http://&lt;host:port&gt;/logLevel?log=&lt;name&gt;</td>
</tr>
</table>
</section>
<section>
<title> datanode</title>
<p>
运行一个HDFS的datanode。
</p>
<p>
<code>用法hadoop datanode [-rollback]</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-rollback</code></td>
<td>将datanode回滚到前一个版本。这需要在停止datanode分发老的hadoop版本之后使用。
</td>
</tr>
</table>
</section>
<section>
<title> dfsadmin </title>
<p>
运行一个HDFS的dfsadmin客户端。
</p>
<p>
<code>用法hadoop dfsadmin [</code><a href="commands_manual.html#常规选项">GENERIC_OPTIONS</a><code>] [-report] [-safemode enter | leave | get | wait] [-refreshNodes]
[-finalizeUpgrade] [-upgradeProgress status | details | force] [-metasave filename]
[-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;] [-clrQuota &lt;dirname&gt;...&lt;dirname&gt;]
[-help [cmd]]</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-report</code></td>
<td>报告文件系统的基本信息和统计信息。</td>
</tr>
<tr>
<td><code>-safemode enter | leave | get | wait</code></td>
<td>安全模式维护命令。安全模式是Namenode的一个状态这种状态下Namenode <br/>
1. 不接受对名字空间的更改(只读)<br/>
2. 不复制或删除块<br/>
Namenode会在启动时自动进入安全模式当配置的块最小百分比数满足最小的副本数条件时会自动离开安全模式。安全模式可以手动进入但是这样的话也必须手动关闭安全模式。
</td>
</tr>
<tr>
<td><code>-refreshNodes</code></td>
<td>重新读取hosts和exclude文件更新允许连到Namenode的或那些需要退出或入编的Datanode的集合。
</td>
</tr>
<tr>
<td><code>-finalizeUpgrade</code></td>
<td>终结HDFS的升级操作。Datanode删除前一个版本的工作目录之后Namenode也这样做。这个操作完结整个升级过程。
</td>
</tr>
<tr>
<td><code>-upgradeProgress status | details | force</code></td>
<td>请求当前系统的升级状态,状态的细节,或者强制升级操作进行。
</td>
</tr>
<tr>
<td><code>-metasave filename</code></td>
<td>保存Namenode的主要数据结构到hadoop.log.dir属性指定的目录下的&lt;filename&gt;文件。对于下面的每一项,&lt;filename&gt;中都会一行内容与之对应<br/>
1. Namenode收到的Datanode的心跳信号<br/>
2. 等待被复制的块<br/>
3. 正在被复制的块<br/>
4. 等待被删除的块</td>
</tr>
<tr>
<td><code>-setQuota &lt;quota&gt; &lt;dirname&gt;...&lt;dirname&gt;</code></td>
<td>为每个目录 &lt;dirname&gt;设定配额&lt;quota&gt;。目录配额是一个长整型整数,强制限定了目录树下的名字个数。<br/>
命令会在这个目录上工作良好,以下情况会报错:<br/>
1. N不是一个正整数或者<br/>
2. 用户不是管理员,或者<br/>
3. 这个目录不存在或是文件,或者<br/>
4. 目录会马上超出新设定的配额。</td>
</tr>
<tr>
<td><code>-clrQuota &lt;dirname&gt;...&lt;dirname&gt;</code></td>
<td>为每一个目录&lt;dirname&gt;清除配额设定。<br/>
命令会在这个目录上工作良好,以下情况会报错:<br/>
1. 这个目录不存在或是文件,或者<br/>
2. 用户不是管理员。<br/>
如果目录原来没有配额不会报错。</td>
</tr>
<tr>
<td><code>-help [cmd]</code></td>
<td>显示给定命令的帮助信息,如果没有给定命令,则显示所有命令的帮助信息。</td>
</tr>
</table>
</section>
<section>
<title> jobtracker </title>
<p>
运行MapReduce job Tracker节点。
</p>
<p>
<code>用法hadoop jobtracker</code>
</p>
</section>
<section>
<title> namenode </title>
<p>
运行namenode。有关升级回滚升级终结的更多信息请参考<a href="hdfs_user_guide.html#升级和回滚">升级和回滚</a>
</p>
<p>
<code>用法hadoop namenode [-format] | [-upgrade] | [-rollback] | [-finalize] | [-importCheckpoint]</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-format</code></td>
<td>格式化namenode。它启动namenode格式化namenode之后关闭namenode。</td>
</tr>
<tr>
<td><code>-upgrade</code></td>
<td>分发新版本的hadoop后namenode应以upgrade选项启动。</td>
</tr>
<tr>
<td><code>-rollback</code></td>
<td>将namenode回滚到前一版本。这个选项要在停止集群分发老的hadoop版本后使用。
</td>
</tr>
<tr>
<td><code>-finalize</code></td>
<td>finalize会删除文件系统的前一状态。最近的升级会被持久化rollback选项将再不可用升级终结操作之后它会停掉namenode。</td>
</tr>
<tr>
<td><code>-importCheckpoint</code></td>
<td>从检查点目录装载镜像并保存到当前检查点目录检查点目录由fs.checkpoint.dir指定。
</td>
</tr>
</table>
</section>
<section>
<title> secondarynamenode </title>
<p>
运行HDFS的secondary namenode。参考<a href="hdfs_user_guide.html#Secondary+NameNode">Secondary Namenode</a>了解更多。
</p>
<p>
<code>用法hadoop secondarynamenode [-checkpoint [force]] | [-geteditsize]</code>
</p>
<table>
<tr><th> 命令选项 </th><th> 描述</th></tr>
<tr>
<td><code>-checkpoint [force]</code></td>
<td>如果EditLog的大小 >= fs.checkpoint.size启动Secondary namenode的检查点过程。
如果使用了-force将不考虑EditLog的大小。</td>
</tr>
<tr>
<td><code>-geteditsize</code></td>
<td>打印EditLog大小。</td>
</tr>
</table>
</section>
<section>
<title> tasktracker </title>
<p>
运行MapReduce的task Tracker节点。
</p>
<p>
<code>用法hadoop tasktracker</code>
</p>
</section>
</section>
</body>
</document>

View File

@ -0,0 +1,294 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>DistCp</title>
</header>
<body>
<section>
<title>概述</title>
<p>DistCp分布式拷贝是用于大规模集群内部和集群之间拷贝的工具。
它使用Map/Reduce实现文件分发错误处理和恢复以及报告生成。
它把文件和目录的列表作为map任务的输入每个任务会完成源列表中部分文件的拷贝。
由于使用了Map/Reduce方法这个工具在语义和执行上都会有特殊的地方。
这篇文档会为常用DistCp操作提供指南并阐述它的工作模型。
</p>
</section>
<section>
<title>使用方法</title>
<section>
<title>基本使用方法</title>
<p>DistCp最常用在集群之间的拷贝</p>
<p><code>bash$ hadoop distcp hdfs://nn1:8020/foo/bar \</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
hdfs://nn2:8020/bar/foo</code></p>
<p>这条命令会把nn1集群的<code>/foo/bar</code>目录下的所有文件或目录名展开并存储到一个临时文件中这些文件内容的拷贝工作被分配给多个map任务
然后每个TaskTracker分别执行从nn1到nn2的拷贝操作。注意DistCp使用绝对路径进行操作。
</p>
<p>命令行中可以指定多个源目录:</p>
<p><code>bash$ hadoop distcp hdfs://nn1:8020/foo/a \</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
hdfs://nn1:8020/foo/b \</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
hdfs://nn2:8020/bar/foo</code></p>
<p>或者使用<code>-f</code>选项,从文件里获得多个源:<br/>
<code>bash$ hadoop distcp -f hdfs://nn1:8020/srclist \</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;hdfs://nn2:8020/bar/foo</code><br/></p>
<p>其中<code>srclist</code> 的内容是<br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</code></p>
<p>当从多个源拷贝时如果两个源冲突DistCp会停止拷贝并提示出错信息
如果在目的位置发生冲突,会根据<a href="#options">选项设置</a>解决。
默认情况会跳过已经存在的目标文件(比如不用源文件做替换操作)。每次操作结束时
都会报告跳过的文件数目,但是如果某些拷贝操作失败了,但在之后的尝试成功了,
那么报告的信息可能不够精确(请参考<a href="#etc">附录</a>)。
</p>
<p>每个TaskTracker必须都能够与源端和目的端文件系统进行访问和交互。
对于HDFS来说源和目的端要运行相同版本的协议或者使用向下兼容的协议。
(请参考<a href="#cpver">不同版本间的拷贝</a> )。
</p>
<p>拷贝完成后,建议生成源端和目的端文件的列表,并交叉检查,来确认拷贝真正成功。
因为DistCp使用Map/Reduce和文件系统API进行操作所以这三者或它们之间有任何问题
都会影响拷贝操作。一些Distcp命令的成功执行可以通过再次执行带-update参数的该命令来完成
但用户在如此操作之前应该对该命令的语法很熟悉。
</p>
<p>值得注意的是,当另一个客户端同时在向源文件写入时,拷贝很有可能会失败。
尝试覆盖HDFS上正在被写入的文件的操作也会失败。
如果一个源文件在拷贝之前被移动或删除了,拷贝失败同时输出异常
FileNotFoundException。</p>
</section> <!-- Basic -->
<section id="options">
<title>选项</title>
<section>
<title>选项索引</title>
<table>
<tr><th> 标识 </th><th> 描述 </th><th> 备注 </th></tr>
<tr><td><code>-p[rbugp]</code></td>
<td>Preserve<br/>
&nbsp;&nbsp;r: replication number<br/>
&nbsp;&nbsp;b: block size<br/>
&nbsp;&nbsp;u: user<br/>
&nbsp;&nbsp;g: group<br/>
&nbsp;&nbsp;p: permission<br/></td>
<td>修改次数不会被保留。并且当指定
<code>-update</code> 时,更新的状态<strong></strong>
被同步,除非文件大小不同(比如文件被重新创建)。
</td></tr>
<tr><td><code>-i</code></td>
<td>忽略失败</td>
<td>就像在 <a href="#etc">附录</a>中提到的,这个选项会比默认情况提供关于拷贝的更精确的统计, 同时它还将保留失败拷贝操作的日志这些日志信息可以用于调试。最后如果一个map失败了但并没完成所有分块任务的尝试这不会导致整个作业的失败。
</td></tr>
<tr><td><code>-log &lt;logdir&gt;</code></td>
<td>记录日志到 &lt;logdir&gt;</td>
<td>DistCp为每个文件的每次尝试拷贝操作都记录日志并把日志作为map的输出。
如果一个map失败了当重新执行时这个日志不会被保留。
</td></tr>
<tr><td><code>-m &lt;num_maps&gt;</code></td>
<td>同时拷贝的最大数目</td>
<td>指定了拷贝数据时map的数目。请注意并不是map数越多吞吐量越大。
</td></tr>
<tr><td><code>-overwrite</code></td>
<td>覆盖目标</td>
<td>如果一个map失败并且没有使用<code>-i</code>选项,不仅仅那些拷贝失败的文件,这个分块任务中的所有文件都会被重新拷贝。
就像<a href="#uo">下面</a>提到的,它会改变生成目标路径的语义,所以
用户要小心使用这个选项。
</td></tr>
<tr><td><code>-update</code></td>
<td>如果源和目标的大小不一样则进行覆盖</td>
<td>像之前提到的,这不是&quot;同步&quot;操作。
执行覆盖的唯一标准是源文件和目标文件大小是否相同;如果不同,则源文件替换目标文件。
<a href="#uo">下面</a>提到的,它也改变生成目标路径的语义,
用户使用要小心。
</td></tr>
<tr><td><code>-f &lt;urilist_uri&gt;</code></td>
<td>使用&lt;urilist_uri&gt; 作为源文件列表</td>
<td>这等价于把所有文件名列在命令行中。
<code>urilist_uri</code> 列表应该是完整合法的URI。
</td></tr>
</table>
</section>
<section id="uo">
<title>更新和覆盖</title>
<p>这里给出一些 <code>-update</code><code>-overwrite</code>的例子。
考虑一个从<code>/foo/a</code>
<code>/foo/b</code><code>/bar/foo</code>的拷贝,源路径包括:
</p>
<p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/aa</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/ab</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ba</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ab</code></p>
<p>如果没设置<code>-update</code><code>-overwrite</code>选项,
那么两个源都会映射到目标端的
<code>/bar/foo/ab</code>
如果设置了这两个选项,每个源目录的内容都会和目标目录的
<strong>内容</strong> 做比较。DistCp碰到这类冲突的情况会终止操作并退出。</p>
<p>默认情况下,<code>/bar/foo/a</code>
<code>/bar/foo/b</code> 目录都会被创建,所以并不会有冲突。</p>
<p>现在考虑一个使用<code>-update</code>合法的操作:<br/>
<code>distcp -update hdfs://nn1:8020/foo/a \</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
hdfs://nn1:8020/foo/b \</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
hdfs://nn2:8020/bar</code></p>
<p>其中源路径/大小:</p>
<p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/aa 32</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/a/ab 32</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/ba 64</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn1:8020/foo/b/bb 32</code></p>
<p>和目的路径/大小:</p>
<p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/aa 32</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ba 32</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/bb 64</code></p>
<p>会产生:</p>
<p><code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/aa 32</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ab 32</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/ba 64</code><br/>
<code>&nbsp;&nbsp;&nbsp;&nbsp;hdfs://nn2:8020/bar/bb 32</code></p>
<p>只有nn2的<code>aa</code>文件没有被覆盖。如果指定了
<code>-overwrite</code>选项,所有文件都会被覆盖。
</p>
</section> <!-- Update and Overwrite -->
</section> <!-- Options -->
</section> <!-- Usage -->
<section id="etc">
<title>附录</title>
<section>
<title>Map数目</title>
<p>DistCp会尝试着均分需要拷贝的内容这样每个map拷贝差不多相等大小的内容。
但因为文件是最小的拷贝粒度所以配置增加同时拷贝如map的数目不一定会增加实际同时拷贝的数目以及总吞吐量。
</p>
<p>如果没使用<code>-m</code>选项DistCp会尝试在调度工作时指定map的数目
<code>min (total_bytes / bytes.per.map, 20 * num_task_trackers)</code>
其中<code>bytes.per.map</code>默认是256MB。</p>
<p>建议对于长时间运行或定期运行的作业根据源和目标集群大小、拷贝数量大小以及带宽调整map的数目。
</p>
</section>
<section id="cpver">
<title>不同HDFS版本间的拷贝</title>
<p>对于不同Hadoop版本间的拷贝用户应该使用HftpFileSystem。
这是一个只读文件系统所以DistCp必须运行在目标端集群上更确切的说是在能够写入目标集群的TaskTracker上
源的格式是
<code>hftp://&lt;dfs.http.address&gt;/&lt;path&gt;</code>
(默认情况<code>dfs.http.address</code>
&lt;namenode&gt;:50070</p>
</section>
<section>
<title>Map/Reduce和副效应</title>
<p>像前面提到的map拷贝输入文件失败时会带来一些副效应。
</p>
<ul>
<li>除非使用了<code>-i</code>,任务产生的日志会被新的尝试替换掉。
</li>
<li>除非使用了<code>-overwrite</code>文件被之前的map成功拷贝后当又一次执行拷贝时会被标记为
&quot;被忽略&quot;</li>
<li>如果map失败了<code>mapred.map.max.attempts</code>剩下的map任务会被终止除非使用了<code>-i</code>)。
</li>
<li>如果<code>mapred.speculative.execution</code>被设置为
<code>final</code><code>true</code>,则拷贝的结果是未定义的。</li>
</ul>
</section>
<!--
<section>
<title>Firewalls and SSL</title>
<p>To copy over HTTP, use the HftpFileSystem as described in the
preceding <a href="#cpver">section</a>, and ensure that the required
port(s) are open.</p>
<p>TODO</p>
</section>
-->
</section> <!-- Appendix -->
</body>
</document>

View File

@ -0,0 +1,69 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop Archives</title>
</header>
<body>
<section>
<title> 什么是Hadoop archives? </title>
<p>
Hadoop archives是特殊的档案格式。一个Hadoop archive对应一个文件系统目录。
Hadoop archive的扩展名是*.har。Hadoop archive包含元数据形式是_index和_masterindx和数据part-*文件。_index文件包含了档案中的文件的文件名和位置信息。
</p>
</section>
<section>
<title> 如何创建archive? </title>
<p>
<code>用法: hadoop archive -archiveName name &lt;src&gt;* &lt;dest&gt;</code>
</p>
<p>
由-archiveName选项指定你要创建的archive的名字。比如foo.har。archive的名字的扩展名应该是*.har。输入是文件系统的路径名路径名的格式和平时的表达方式一样。创建的archive会保存到目标目录下。注意创建archives是一个Map/Reduce job。你应该在map reduce集群上运行这个命令。下面是一个例子
</p>
<p>
<code>hadoop archive -archiveName foo.har /user/hadoop/dir1 /user/hadoop/dir2 /user/zoo/</code>
</p><p>
在上面的例子中,
/user/hadoop/dir1 和 /user/hadoop/dir2 会被归档到这个文件系统目录下
-- /user/zoo/foo.har。当创建archive时源文件不会被更改或删除。
</p>
</section>
<section>
<title> 如何查看archives中的文件? </title>
<p>
archive作为文件系统层暴露给外界。所以所有的fs shell命令都能在archive上运行但是要使用不同的URI。
另外archive是不可改变的。所以重命名删除和创建都会返回错误。Hadoop Archives 的URI是
</p><p><code>har://scheme-hostname:port/archivepath/fileinarchive</code></p><p>
如果没提供scheme-hostname它会使用默认的文件系统。这种情况下URI是这种形式
</p><p><code>
har:///archivepath/fileinarchive</code></p>
<p>
这是一个archive的例子。archive的输入是/dir。这个dir目录包含文件fileafileb。
把/dir归档到/user/hadoop/foo.bar的命令是
</p>
<p><code>hadoop archive -archiveName foo.har /dir /user/hadoop</code>
</p><p>
获得创建的archive中的文件列表使用命令
</p>
<p><code>hadoop dfs -lsr har:///user/hadoop/foo.har</code></p>
<p>查看archive中的filea文件的命令-
</p><p><code>hadoop dfs -cat har:///user/hadoop/foo.har/dir/filea</code></p>
</section>
</body>
</document>

View File

@ -0,0 +1,376 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
Hadoop分布式文件系统架构和设计
</title>
<authors>
<person name="Dhruba Borthakur" email="dhruba@yahoo-inc.com"/>
</authors>
</header>
<body>
<section>
<title> 引言 </title>
<p>
Hadoop分布式文件系统(<acronym title="Hadoop分布式文件系统">HDFS</acronym>)被设计成适合运行在通用硬件(commodity hardware)上的分布式文件系统。它和现有的分布式文件系统有很多共同点。但同时它和其他的分布式文件系统的区别也是很明显的。HDFS是一个高度容错性的系统适合部署在廉价的机器上。HDFS能提供高吞吐量的数据访问非常适合大规模数据集上的应用。HDFS放宽了一部分POSIX约束来实现流式读取文件系统数据的目的。HDFS在最开始是作为Apache Nutch搜索引擎项目的基础架构而开发的。HDFS是Apache Hadoop Core项目的一部分。这个项目的地址是<a href="http://hadoop.apache.org/core/">http://hadoop.apache.org/core/</a>
</p>
</section>
<section>
<title> 前提和设计目标 </title>
<section>
<title> 硬件错误 </title>
<p>
硬件错误是常态而不是异常。HDFS可能由成百上千的服务器所构成每个服务器上存储着文件系统的部分数据。我们面对的现实是构成系统的组件数目是巨大的而且任一组件都有可能失效这意味着总是有一部分HDFS的组件是不工作的。因此错误检测和快速、自动的恢复是HDFS最核心的架构目标。
</p>
</section>
<section>
<title> 流式数据访问 </title>
<p>
运行在HDFS上的应用和普通的应用不同需要流式访问它们的数据集。HDFS的设计中更多的考虑到了数据批处理而不是用户交互处理。比之数据访问的低延迟问题更关键的在于数据访问的高吞吐量。POSIX标准设置的很多硬性约束对HDFS应用系统不是必需的。为了提高数据的吞吐量在一些关键方面对POSIX的语义做了一些修改。
</p>
</section>
<section>
<title> 大规模数据集 </title>
<p>
运行在HDFS上的应用具有很大的数据集。HDFS上的一个典型文件大小一般都在G字节至T字节。因此HDFS被调节以支持大文件存储。它应该能提供整体上高的数据传输带宽能在一个集群里扩展到数百个节点。一个单一的HDFS实例应该能支撑数以千万计的文件。
</p>
</section>
<section>
<title> 简单的一致性模型 </title>
<!--DCCOMMENT:diff begin-->
<p>
HDFS应用需要一个“一次写入多次读取”的文件访问模型。一个文件经过创建、写入和关闭之后就不需要改变。这一假设简化了数据一致性问题并且使高吞吐量的数据访问成为可能。Map/Reduce应用或者网络爬虫应用都非常适合这个模型。目前还有计划在将来扩充这个模型使之支持文件的附加写操作。
</p>
<!--DCCOMMENT:diff end
note: "MapReduce" has been replaced by "Map/Reduce" in this doc
@@ -67,7 +67,7 @@
<section>
<title> Simple Coherency Model </title>
<p>
- HDFS applications need a write-once-read-many access model for files. A file once created, written, and closed need not be changed. This assumption simplifies data coherency issues and enables high throughput data access. A MapReduce application or a web crawler application fits perfectly with this model. There is a plan to support appending-writes to files in the future.
+ HDFS applications need a write-once-read-many access model for files. A file once created, written, and closed need not be changed. This assumption simplifies data coherency issues and enables high throughput data access. A Map/Reduce application or a web crawler application fits perfectly with this model. There is a plan to support appending-writes to files in the future.
</p>
</section>
-->
</section>
<section>
<title> &#x201c;移动计算比移动数据更划算&#x201d; </title>
<p>
一个应用请求的计算离它操作的数据越近就越高效在数据达到海量级别的时候更是如此。因为这样就能降低网络阻塞的影响提高系统数据的吞吐量。将计算移动到数据附近比之将数据移动到应用所在显然更好。HDFS为应用提供了将它们自己移动到数据附近的接口。
</p>
</section>
<section>
<title> 异构软硬件平台间的可移植性 </title>
<p>
HDFS在设计的时候就考虑到平台的可移植性。这种特性方便了HDFS作为大规模数据应用平台的推广。
</p>
</section>
</section>
<section>
<title> Namenode 和 Datanode </title>
<!--DCCOMMENT:diff begin-->
<p>
HDFS采用master/slave架构。一个HDFS集群是由一个Namenode和一定数目的Datanodes组成。Namenode是一个中心服务器负责管理文件系统的名字空间(namespace)以及客户端对文件的访问。集群中的Datanode一般是一个节点一个负责管理它所在节点上的存储。HDFS暴露了文件系统的名字空间用户能够以文件的形式在上面存储数据。从内部看一个文件其实被分成一个或多个数据块这些块存储在一组Datanode上。Namenode执行文件系统的名字空间操作比如打开、关闭、重命名文件或目录。它也负责确定数据块到具体Datanode节点的映射。Datanode负责处理文件系统客户端的读写请求。在Namenode的统一调度下进行数据块的创建、删除和复制。
</p>
<!--DCCOMMENT:diff end
note : tag "<em></em>" has been deleted.
<p>
- HDFS has a master/slave architecture. An HDFS cluster consists of a single <em>Namenode</em>, a master server that manages the file system namespace and regulates access to files by clients. In addition, there are a number of <em>Datanodes</em>, usually one per node in the cluster, which manage storage attached to the nodes that they run on. HDFS exposes a file system namespace and allows user data to be stored in files. Internally, a file is split into one or more blocks and these blocks are stored in a set of Datanodes. The Namenode executes file system namespace operations like opening, closing, and renaming files and directories. It also determines the mapping of blocks to Datanodes. The Datanodes are responsible for serving read and write requests from the file system&#x2019;s clients. The Datanodes also perform block creation, deletion, and replication upon instruction from the Namenode.
+ HDFS has a master/slave architecture. An HDFS cluster consists of a single NameNode, a master server that manages the file system namespace and regulates access to files by clients. In addition, there are a number of DataNodes, usually one per node in the cluster, which manage storage attached to the nodes that they run on. HDFS exposes a file system namespace and allows user data to be stored in files. Internally, a file is split into one or more blocks and these blocks are stored in a set of DataNodes. The NameNode executes file system namespace operations like opening, closing, and renaming files and directories. It also determines the mapping of blocks to DataNodes. The DataNodes are responsible for serving read and write requests from the file system&#x2019;s clients. The DataNodes also perform block creation, deletion, and replication upon instruction from the NameNode.
-->
<figure alt="HDFS 架构" src="images/hdfsarchitecture.gif"/>
<p>
Namenode和Datanode被设计成可以在普通的商用机器上运行。这些机器一般运行着GNU/Linux操作系统(<acronym title="操作系统">OS</acronym>)。HDFS采用Java语言开发因此任何支持Java的机器都可以部署Namenode或Datanode。由于采用了可移植性极强的Java语言使得HDFS可以部署到多种类型的机器上。一个典型的部署场景是一台机器上只运行一个Namenode实例而集群中的其它机器分别运行一个Datanode实例。这种架构并不排斥在一台机器上运行多个Datanode只不过这样的情况比较少见。
</p>
<p>
集群中单一Namenode的结构大大简化了系统的架构。Namenode是所有HDFS元数据的仲裁者和管理者这样用户数据永远不会流过Namenode。
</p>
</section>
<section>
<title> 文件系统的名字空间 (namespace) </title>
<p>
HDFS支持传统的层次型文件组织结构。用户或者应用程序可以创建目录然后将文件保存在这些目录里。文件系统名字空间的层次结构和大多数现有的文件系统类似用户可以创建、删除、移动或重命名文件。当前HDFS不支持用户磁盘配额和访问权限控制也不支持硬链接和软链接。但是HDFS架构并不妨碍实现这些特性。
</p>
<p>
Namenode负责维护文件系统的名字空间任何对文件系统名字空间或属性的修改都将被Namenode记录下来。应用程序可以设置HDFS保存的文件的副本数目。文件副本的数目称为文件的副本系数这个信息也是由Namenode保存的。
</p>
</section>
<section>
<title> 数据复制 </title>
<p>
HDFS被设计成能够在一个大集群中跨机器可靠地存储超大文件。它将每个文件存储成一系列的数据块除了最后一个所有的数据块都是同样大小的。为了容错文件的所有数据块都会有副本。每个文件的数据块大小和副本系数都是可配置的。应用程序可以指定某个文件的副本数目。副本系数可以在文件创建的时候指定也可以在之后改变。HDFS中的文件都是一次性写入的并且严格要求在任何时候只能有一个写入者。
</p>
<p>
Namenode全权管理数据块的复制它周期性地从集群中的每个Datanode接收心跳信号和块状态报告(Blockreport)。接收到心跳信号意味着该Datanode节点工作正常。块状态报告包含了一个该Datanode上所有数据块的列表。
</p>
<figure alt="HDFS Datanodes" src="images/hdfsdatanodes.gif"/>
<section>
<title> 副本存放: 最最开始的一步 </title>
<p>
副本的存放是HDFS可靠性和性能的关键。优化的副本存放策略是HDFS区分于其他大部分分布式文件系统的重要特性。这种特性需要做大量的调优并需要经验的积累。HDFS采用一种称为机架感知(rack-aware)的策略来改进数据的可靠性、可用性和网络带宽的利用率。目前实现的副本存放策略只是在这个方向上的第一步。实现这个策略的短期目标是验证它在生产环境下的有效性,观察它的行为,为实现更先进的策略打下测试和研究的基础。
</p>
<p>
大型HDFS实例一般运行在跨越多个机架的计算机组成的集群上不同机架上的两台机器之间的通讯需要经过交换机。在大多数情况下同一个机架内的两台机器间的带宽会比不同机架的两台机器间的带宽大。
</p>
<p>
通过一个<a href="cluster_setup.html#Hadoop的机架感知">机架感知</a>的过程Namenode可以确定每个Datanode所属的机架id。一个简单但没有优化的策略就是将副本存放在不同的机架上。这样可以有效防止当整个机架失效时数据的丢失并且允许读数据的时候充分利用多个机架的带宽。这种策略设置可以将副本均匀分布在集群中有利于当组件失效情况下的负载均衡。但是因为这种策略的一个写操作需要传输数据块到多个机架这增加了写的代价。
</p>
<p>
在大多数情况下副本系数是3HDFS的存放策略是将一个副本存放在本地机架的节点上一个副本放在同一机架的另一个节点上最后一个副本放在不同机架的节点上。这种策略减少了机架间的数据传输这就提高了写操作的效率。机架的错误远远比节点的错误少所以这个策略不会影响到数据的可靠性和可用性。于此同时因为数据块只放在两个不是三个不同的机架上所以此策略减少了读取数据时需要的网络传输总带宽。在这种策略下副本并不是均匀分布在不同的机架上。三分之一的副本在一个节点上三分之二的副本在一个机架上其他副本均匀分布在剩下的机架中这一策略在不损害数据可靠性和读取性能的情况下改进了写的性能。
</p>
<p>
当前,这里介绍的默认副本存放策略正在开发的过程中。
</p>
</section>
<section>
<title> 副本选择 </title>
<p>
为了降低整体的带宽消耗和读取延时HDFS会尽量让读取程序读取离它最近的副本。如果在读取程序的同一个机架上有一个副本那么就读取该副本。如果一个HDFS集群跨越多个数据中心那么客户端也将首先读本地数据中心的副本。
</p>
</section>
<section>
<title> 安全模式 </title>
<p>
Namenode启动后会进入一个称为安全模式的特殊状态。处于安全模式的Namenode是不会进行数据块的复制的。Namenode从所有的 Datanode接收心跳信号和块状态报告。块状态报告包括了某个Datanode所有的数据块列表。每个数据块都有一个指定的最小副本数。当Namenode检测确认某个数据块的副本数目达到这个最小值那么该数据块就会被认为是副本安全(safely replicated)的在一定百分比这个参数可配置的数据块被Namenode检测确认是安全之后加上一个额外的30秒等待时间Namenode将退出安全模式状态。接下来它会确定还有哪些数据块的副本没有达到指定数目并将这些数据块复制到其他Datanode上。
</p>
</section>
</section>
<section>
<title> 文件系统元数据的持久化 </title>
<p>
Namenode上保存着HDFS的名字空间。对于任何对文件系统元数据产生修改的操作Namenode都会使用一种称为EditLog的事务日志记录下来。例如在HDFS中创建一个文件Namenode就会在Editlog中插入一条记录来表示同样地修改文件的副本系数也将往Editlog插入一条记录。Namenode在本地操作系统的文件系统中存储这个Editlog。整个文件系统的名字空间包括数据块到文件的映射、文件的属性等都存储在一个称为FsImage的文件中这个文件也是放在Namenode所在的本地文件系统上。
</p>
<p>
Namenode在内存中保存着整个文件系统的名字空间和文件数据块映射(Blockmap)的映像。这个关键的元数据结构设计得很紧凑因而一个有4G内存的Namenode足够支撑大量的文件和目录。当Namenode启动时它从硬盘中读取Editlog和FsImage将所有Editlog中的事务作用在内存中的FsImage上并将这个新版本的FsImage从内存中保存到本地磁盘上然后删除旧的Editlog因为这个旧的Editlog的事务都已经作用在FsImage上了。这个过程称为一个检查点(checkpoint)。在当前实现中检查点只发生在Namenode启动时在不久的将来将实现支持周期性的检查点。
</p>
<p>
Datanode将HDFS数据以文件的形式存储在本地的文件系统中它并不知道有关HDFS文件的信息。它把每个HDFS数据块存储在本地文件系统的一个单独的文件中。Datanode并不在同一个目录创建所有的文件实际上它用试探的方法来确定每个目录的最佳文件数目并且在适当的时候创建子目录。在同一个目录中创建所有的本地文件并不是最优的选择这是因为本地文件系统可能无法高效地在单个目录中支持大量的文件。当一个Datanode启动时它会扫描本地文件系统产生一个这些本地文件对应的所有HDFS数据块的列表然后作为报告发送到Namenode这个报告就是块状态报告。
</p>
</section>
<section>
<title> 通讯协议 </title>
<p>
所有的HDFS通讯协议都是建立在TCP/IP协议之上。客户端通过一个可配置的<acronym title="Transmission Control Protocol">TCP</acronym>端口连接到Namenode通过ClientProtocol协议与Namenode交互。而Datanode使用DatanodeProtocol协议与Namenode交互。一个远程过程调用(<acronym title="Remote Procedure Call">RPC</acronym>)模型被抽象出来封装ClientProtocol和Datanodeprotocol协议。在设计上Namenode不会主动发起RPC而是响应来自客户端或 Datanode 的RPC请求。
</p>
</section>
<section>
<title> 健壮性 </title>
<p>
HDFS的主要目标就是即使在出错的情况下也要保证数据存储的可靠性。常见的三种出错情况是Namenode出错, Datanode出错和网络割裂(network partitions)。
</p>
<section>
<title> 磁盘数据错误,心跳检测和重新复制 </title>
<p>
每个Datanode节点周期性地向Namenode发送心跳信号。网络割裂可能导致一部分Datanode跟Namenode失去联系。Namenode通过心跳信号的缺失来检测这一情况并将这些近期不再发送心跳信号Datanode标记为宕机不会再将新的<acronym title="Input/Output">IO</acronym>请求发给它们。任何存储在宕机Datanode上的数据将不再有效。Datanode的宕机可能会引起一些数据块的副本系数低于指定值Namenode不断地检测这些需要复制的数据块一旦发现就启动复制操作。在下列情况下可能需要重新复制某个Datanode节点失效某个副本遭到损坏Datanode上的硬盘错误或者文件的副本系数增大。
</p>
</section>
<section>
<title> 集群均衡 </title>
<p>
HDFS的架构支持数据均衡策略。如果某个Datanode节点上的空闲空间低于特定的临界点按照均衡策略系统就会自动地将数据从这个Datanode移动到其他空闲的Datanode。当对某个文件的请求突然增加那么也可能启动一个计划创建该文件新的副本并且同时重新平衡集群中的其他数据。这些均衡策略目前还没有实现。
</p>
</section>
<section>
<title> 数据完整性 </title>
<p>
<!-- XXX "checksum checking" sounds funny -->
从某个Datanode获取的数据块有可能是损坏的损坏可能是由Datanode的存储设备错误、网络错误或者软件bug造成的。HDFS客户端软件实现了对HDFS文件内容的校验和(checksum)检查。当客户端创建一个新的HDFS文件会计算这个文件每个数据块的校验和并将校验和作为一个单独的隐藏文件保存在同一个HDFS名字空间下。当客户端获取文件内容后它会检验从Datanode获取的数据跟相应的校验和文件中的校验和是否匹配如果不匹配客户端可以选择从其他Datanode获取该数据块的副本。
</p>
</section>
<section>
<title> 元数据磁盘错误 </title>
<p>
FsImage和Editlog是HDFS的核心数据结构。如果这些文件损坏了整个HDFS实例都将失效。因而Namenode可以配置成支持维护多个FsImage和Editlog的副本。任何对FsImage或者Editlog的修改都将同步到它们的副本上。这种多副本的同步操作可能会降低Namenode每秒处理的名字空间事务数量。然而这个代价是可以接受的因为即使HDFS的应用是数据密集的它们也非元数据密集的。当Namenode重启的时候它会选取最近的完整的FsImage和Editlog来使用。
</p>
<p>
Namenode是HDFS集群中的单点故障(single point of failure)所在。如果Namenode机器故障是需要手工干预的。目前自动重启或在另一台机器上做Namenode故障转移的功能还没实现。
</p>
</section>
<section>
<title> 快照 </title>
<p>
快照支持某一特定时刻的数据的复制备份。利用快照可以让HDFS在数据损坏时恢复到过去一个已知正确的时间点。HDFS目前还不支持快照功能但计划在将来的版本进行支持。
</p>
</section>
</section>
<section>
<!-- XXX Better name -->
<title> 数据组织 </title>
<section>
<title> 数据块 </title>
<p>
HDFS被设计成支持大文件适用HDFS的是那些需要处理大规模的数据集的应用。这些应用都是只写入数据一次但却读取一次或多次并且读取速度应能满足流式读取的需要。HDFS支持文件的“一次写入多次读取”语义。一个典型的数据块大小是64MB。因而HDFS中的文件总是按照64M被切分成不同的块每个块尽可能地存储于不同的Datanode中。
</p>
</section>
<section>
<!-- XXX staging never described / referenced in its section -->
<title> Staging </title>
<p>
客户端创建文件的请求其实并没有立即发送给Namenode事实上在刚开始阶段HDFS客户端会先将文件数据缓存到本地的一个临时文件。应用程序的写操作被透明地重定向到这个临时文件。当这个临时文件累积的数据量超过一个数据块的大小客户端才会联系Namenode。Namenode将文件名插入文件系统的层次结构中并且分配一个数据块给它。然后返回Datanode的标识符和目标数据块给客户端。接着客户端将这块数据从本地临时文件上传到指定的Datanode上。当文件关闭时在临时文件中剩余的没有上传的数据也会传输到指定的Datanode上。然后客户端告诉Namenode文件已经关闭。此时Namenode才将文件创建操作提交到日志里进行存储。如果Namenode在文件关闭前宕机了则该文件将丢失。
</p>
<p>
上述方法是对在HDFS上运行的目标应用进行认真考虑后得到的结果。这些应用需要进行文件的流式写入。如果不采用客户端缓存由于网络速度和网络堵塞会对吞估量造成比较大的影响。这种方法并不是没有先例的早期的文件系统比如<acronym title="Andrew File System">AFS</acronym>就用客户端缓存来提高性能。为了达到更高的数据上传效率已经放松了POSIX标准的要求。
</p>
</section>
<section>
<title> 流水线复制 </title>
<p>
当客户端向HDFS文件写入数据的时候一开始是写到本地临时文件中。假设该文件的副本系数设置为3当本地临时文件累积到一个数据块的大小时客户端会从Namenode获取一个Datanode列表用于存放副本。然后客户端开始向第一个Datanode传输数据第一个Datanode一小部分一小部分(4 KB)地接收数据将每一部分写入本地仓库并同时传输该部分到列表中第二个Datanode节点。第二个Datanode也是这样一小部分一小部分地接收数据写入本地仓库并同时传给第三个Datanode。最后第三个Datanode接收数据并存储在本地。因此Datanode能流水线式地从前一个节点接收数据并在同时转发给下一个节点数据以流水线的方式从前一个Datanode复制到下一个。
</p>
</section>
</section>
<section>
<!-- XXX "Accessibility" sounds funny - "Interfaces" ? -->
<title> 可访问性 </title>
<!-- XXX Make an API section ? (HTTP is "web service" API?) -->
<p>
HDFS给应用提供了多种访问方式。用户可以通过<a href="http://hadoop.apache.org/core/docs/current/api/">Java API</a>接口访问也可以通过C语言的封装API访问还可以通过浏览器的方式访问HDFS中的文件。通过<acronym title="Web-based Distributed Authoring and Versioning">WebDAV</acronym>协议访问的方式正在开发中。
</p>
<section>
<title> DFSShell </title>
<p>
HDFS以文件和目录的形式组织用户数据。它提供了一个命令行的接口(DFSShell)让用户与HDFS中的数据进行交互。命令的语法和用户熟悉的其他shell(例如 bash, csh)工具类似。下面是一些动作/命令的示例:
</p>
<table>
<tr>
<th> 动作 </th><th> 命令 </th>
</tr>
<tr>
<td> 创建一个名为 <code>/foodir</code> 的目录 </td> <td> <code>bin/hadoop dfs -mkdir /foodir</code> </td>
</tr>
<tr>
<td> 创建一个名为 <code>/foodir</code> 的目录 </td> <td> <code>bin/hadoop dfs -mkdir /foodir</code> </td>
</tr>
<tr>
<td> 查看名为 <code>/foodir/myfile.txt</code> 的文件内容 </td> <td> <code>bin/hadoop dfs -cat /foodir/myfile.txt</code> </td>
</tr>
</table>
<p>
DFSShell 可以用在那些通过脚本语言和文件系统进行交互的应用程序上。
</p>
</section>
<section>
<title> DFSAdmin </title>
<p>
DFSAdmin 命令用来管理HDFS集群。这些命令只有HDSF的管理员才能使用。下面是一些动作/命令的示例:
</p>
<table>
<tr>
<th> 动作 </th><th> 命令 </th>
</tr>
<tr>
<td> 将集群置于安全模式 </td> <td> <code>bin/hadoop dfsadmin -safemode enter</code> </td>
</tr>
<tr>
<td> 显示Datanode列表 </td> <td> <code>bin/hadoop dfsadmin -report</code> </td>
</tr>
<tr>
<td> 使Datanode节点 <code>datanodename</code>退役</td><td> <code>bin/hadoop dfsadmin -decommission datanodename</code> </td>
</tr>
</table>
</section>
<section>
<title> 浏览器接口 </title>
<p>
一个典型的HDFS安装会在一个可配置的TCP端口开启一个Web服务器用于暴露HDFS的名字空间。用户可以用浏览器来浏览HDFS的名字空间和查看文件的内容。
</p>
</section>
</section>
<section>
<title> 存储空间回收 </title>
<section>
<title> 文件的删除和恢复 </title>
<p>
当用户或应用程序删除某个文件时这个文件并没有立刻从HDFS中删除。实际上HDFS会将这个文件重命名转移到<code>/trash</code>目录。只要文件还在<code>/trash</code>目录中,该文件就可以被迅速地恢复。文件在<code>/trash</code>中保存的时间是可配置的当超过这个时间时Namenode就会将该文件从名字空间中删除。删除文件会使得该文件相关的数据块被释放。注意从用户删除文件到HDFS空闲空间的增加之间会有一定时间的延迟。</p>
<p>
只要被删除的文件还在<code>/trash</code>目录中,用户就可以恢复这个文件。如果用户想恢复被删除的文件,他/她可以浏览<code>/trash</code>目录找回该文件。<code>/trash</code>目录仅仅保存被删除文件的最后副本。<code>/trash</code>目录与其他的目录没有什么区别除了一点在该目录上HDFS会应用一个特殊策略来自动删除文件。目前的默认策略是删除<code>/trash</code>中保留时间超过6小时的文件。将来这个策略可以通过一个被良好定义的接口配置。
</p>
</section>
<section>
<title> 减少副本系数 </title>
<p>
当一个文件的副本系数被减小后Namenode会选择过剩的副本删除。下次心跳检测时会将该信息传递给Datanode。Datanode遂即移除相应的数据块集群中的空闲空间加大。同样在调用<code>setReplication</code> API结束和集群中空闲空间增加间会有一定的延迟。</p>
</section>
</section>
<section>
<title> 参考资料 </title>
<p>
HDFS Java API:
<a href="http://hadoop.apache.org/core/docs/current/api/">
http://hadoop.apache.org/core/docs/current/api/
</a>
</p>
<p>
HDFS 源代码:
<a href= "http://hadoop.apache.org/core/version_control.html">
http://hadoop.apache.org/core/version_control.html
</a>
</p>
</section>
</body>
</document>

View File

@ -0,0 +1,193 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
HDFS权限管理用户指南
</title>
</header>
<body>
<section> <title>概述</title>
<p>
Hadoop分布式文件系统实现了一个和POSIX系统类似的文件和目录的权限模型。每个文件和目录有一个<em>所有者owner</em>和一个<em>group</em>。文件或目录对其所有者、同组的其他用户以及所有其他用户分别有着不同的权限。对文件而言,当读取这个文件时需要有<em>r</em>权限,当写入或者追加到文件时需要有<em>w</em>权限。对目录而言,当列出目录内容时需要具有<em>r</em>权限,当新建或删除子文件或子目录时需要有<em>w</em>权限,当访问目录的子节点时需要有<em>x</em>权限。不同于POSIX模型HDFS权限模型中的文件没有<em>sticky</em><em>setuid</em><em>setgid</em>位,因为这里没有可执行文件的概念。为了简单起见,这里也没有目录的<em>sticky</em><em>setuid</em><em>setgid</em>位。总的来说,文件或目录的权限就是它的<em>模式mode</em>。HDFS采用了Unix表示和显示模式的习惯包括使用八进制数来表示权限。当新建一个文件或目录它的所有者即客户进程的用户它的所属组是父目录的组BSD的规定
</p>
<p>
每个访问HDFS的用户进程的标识分为两个部分分别是<em>用户名</em><em>组名列表</em>。每次用户进程访问一个文件或目录<code>foo</code>HDFS都要对其进行权限检查
</p>
<ul>
<li>
如果用户即<code>foo</code>的所有者,则检查所有者的访问权限;
</li>
<li>
如果<code>foo</code>关联的组在组名列表中出现,则检查组用户的访问权限;
</li>
<li>
否则检查<code>foo</code>其他用户的访问权限。
</li>
</ul>
<p>
如果权限检查失败,则客户的操作会失败。
</p>
</section>
<section><title>用户身份</title>
<p>
在这个版本的Hadoop中客户端用户身份是通过宿主操作系统给出。对类Unix系统来说
</p>
<ul>
<li>
用户名等于<code>`whoami`</code>
</li>
<li>
组列表等于<code>`bash -c groups`</code>
</li>
</ul>
<p>
将来会增加其他的方式来确定用户身份比如Kerberos、LDAP等。期待用上文中提到的第一种方式来防止一个用户假冒另一个用户是不现实的。这种用户身份识别机制结合权限模型允许一个协作团体以一种有组织的形式共享文件系统中的资源。
</p>
<p>
不管怎样用户身份机制对HDFS本身来说只是外部特性。HDFS并不提供创建用户身份、创建组或处理用户凭证等功能。
</p>
</section>
<section> <title>理解系统的实现</title>
<p>
每次文件或目录操作都传递完整的路径名给name node每一个操作都会对此路径做权限检查。客户框架会隐式地将用户身份和与name node的连接关联起来从而减少改变现有客户端API的需求。经常会有这种情况当对一个文件的某一操作成功后之后同样的操作却会失败这是因为文件或路径上的某些目录已经不复存在了。比如客户端首先开始读一个文件它向name node发出一个请求以获取文件第一个数据块的位置。但接下去的获取其他数据块的第二个请求可能会失败。另一方面删除一个文件并不会撤销客户端已经获得的对文件数据块的访问权限。而权限管理能使得客户端对一个文件的访问许可在两次请求之间被收回。重复一下权限的改变并不会撤销当前客户端对文件数据块的访问许可。
</p>
<p>
map-reduce框架通过传递字符串来指派用户身份没有做其他特别的安全方面的考虑。文件或目录的所有者和组属性是以字符串的形式保存而不是像传统的Unix方式转换为用户和组的数字ID。
</p>
<p>
这个发行版本的权限管理特性并不需要改变data node的任何行为。Data node上的数据块上并没有任何<em>Hadoop</em>所有者或权限等关联属性。
</p>
</section>
<section> <title>文件系统API变更</title>
<p>
如果权限检查失败,所有使用一个路径参数的方法都可能抛出<code>AccessControlException</code>异常。
</p>
<p>新增方法:</p>
<ul>
<li>
<code>public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException</code>
</li>
<li>
<code>public boolean mkdirs(Path f, FsPermission permission) throws IOException</code>
</li>
<li>
<code>public void setPermission(Path p, FsPermission permission) throws IOException</code>
</li>
<li>
<code>public void setOwner(Path p, String username, String groupname) throws IOException</code>
</li>
<li>
<code>public FileStatus getFileStatus(Path f) throws IOException</code> 也会返回路径关联的所有者、组和模式属性。
</li>
</ul>
<p>
新建文件或目录的模式受配置参数<code>umask</code>的约束。当使用之前的 <code>create(path, &hellip;)</code> 方法(<em>没有指定</em>权限参数)时,新文件的模式是<code>666&thinsp;&amp;&thinsp;^umask</code>。当使用新的 <code>create(path, </code><em>permission</em><code>, &hellip;)</code> 方法(<em>指定了</em>权限参数<em>P</em>)时,新文件的模式是<code>P&thinsp;&amp;&thinsp;^umask&thinsp;&amp;&thinsp;666</code>。当使用先前的 <code>mkdirs(path)</code> 方法(<em>没有指定</em> 权限参数)新建一个目录时,新目录的模式是<code>777&thinsp;&amp;&thinsp;^umask</code>。当使用新的 <code>mkdirs(path, </code><em>permission</em> <code>)</code> 方法(<em>指定了</em>权限参数<em>P</em>)新建一个目录时,新目录的模式是<code>P&thinsp;&amp;&thinsp;^umask&thinsp;&amp;&thinsp;777</code>
</p>
</section>
<section> <title>Shell命令变更</title>
<p>新增操作:</p>
<dl>
<dt><code>chmod [-R]</code> <em>mode file &hellip;</em></dt>
<dd>
只有文件的所有者或者超级用户才有权限改变文件模式。
</dd>
<dt><code>chgrp [-R]</code> <em>group file &hellip;</em></dt>
<dd>
使用<code>chgrp</code>命令的用户必须属于特定的组且是文件的所有者,或者用户是超级用户。
</dd>
<dt><code>chown [-R]</code> <em>[owner][:[group]] file &hellip;</em></dt>
<dd>
文件的所有者的只能被超级用户更改。
</dd>
<dt><code>ls </code> <em>file &hellip;</em></dt><dd></dd>
<dt><code>lsr </code> <em>file &hellip;</em></dt>
<dd>
输出格式做了调整以显示所有者、组和模式。
</dd>
</dl></section>
<section> <title>超级用户</title>
<p>
超级用户即运行name node进程的用户。宽泛的讲如果你启动了name node你就是超级用户。超级用户干任何事情因为超级用户能够通过所有的权限检查。没有永久记号保留谁<em>过去</em>是超级用户当name node开始运行时进程自动判断谁<em>现在</em>是超级用户。HDFS的超级用户不一定非得是name node主机上的超级用户也不需要所有的集群的超级用户都是一个。同样的在个人工作站上运行HDFS的实验者不需任何配置就已方便的成为了他的部署实例的超级用户。
</p>
<p>
另外,管理员可以用配置参数指定一组特定的用户,如果做了设定,这个组的成员也会是超级用户。
</p>
</section>
<section> <title>Web服务器</title>
<p>
Web服务器的身份是一个可配置参数。Name node并没有<em>真实</em>用户的概念但是Web服务器表现地就像它具有管理员选定的用户的身份用户名和组一样。除非这个选定的身份是超级用户否则会有名字空间中的一部分对Web服务器来说不可见。
</p>
</section>
<section> <title>在线升级</title>
<p>
如果集群在0.15版本的数据集(<code>fsimage</code>)上启动,所有的文件和目录都有所有者<em>O</em>,组<em>G</em>,和模式<em>M</em>,这里 <em>O</em><em>G</em> 分别是超级用户的用户标识和组名,<em>M</em>是一个配置参数。</p>
</section>
<section> <title>配置参数</title>
<dl>
<dt><code>dfs.permissions = true </code></dt>
<dd>
如果是 <code>true</code>,则打开前文所述的权限系统。如果是 <code>false</code>,权限<em>检查</em> 就是关闭的,但是其他的行为没有改变。这个配置参数的改变并不改变文件或目录的模式、所有者和组等信息。
<p>
</p>
不管权限模式是开还是关,<code>chmod</code><code>chgrp</code><code>chown</code> <em>总是</em> 会检查权限。这些命令只有在权限检查背景下才有用,所以不会有兼容性问题。这样,这就能让管理员在打开常规的权限检查之前可以可靠地设置文件的所有者和权限。
</dd>
<dt><code>dfs.web.ugi = webuser,webgroup</code></dt>
<dd>
Web服务器使用的用户名。如果将这个参数设置为超级用户的名称则所有Web客户就可以看到所有的信息。如果将这个参数设置为一个不使用的用户则Web客户就只能访问到“other”权限可访问的资源了。额外的组可以加在后面形成一个用逗号分隔的列表。
</dd>
<dt><code>dfs.permissions.supergroup = supergroup</code></dt>
<dd>
超级用户的组名。
</dd>
<dt><code>dfs.upgrade.permission = 777</code></dt>
<dd>
升级时的初始模式。文件<em>永不会</em>被设置<em>x</em>权限。在配置文件中,可以使用十进制数<em>511<sub>10</sub></em>
</dd>
<dt><code>dfs.umask = 022</code></dt>
<dd>
<code>umask</code>参数在创建文件和目录时使用。在配置文件中,可以使用十进制数<em>18<sub>10</sub></em>
</dd>
</dl>
</section>
</body>
</document>

View File

@ -0,0 +1,72 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
名字空间配额管理指南
</title>
</header>
<body>
<p>
Hadoop分布式文件系统(HDFS)允许管理员为每个目录设置配额。
新建立的目录没有配额。
最大的配额是<code>Long.Max_Value</code>。配额为1可以强制目录保持为空。
</p>
<p>
目录配额是对目录树上该目录下的名字数量做硬性限制。如果创建文件或目录时超过了配额,该操作会失败。重命名不会改变该目录的配额;如果重命名操作会导致违反配额限制,该操作将会失败。如果尝试设置一个配额而现有文件数量已经超出了这个新配额,则设置失败。
</p>
<p>
配额和fsimage保持一致。当启动时如果fsimage违反了某个配额限制也许fsimage被偷偷改变了则启动失败并生成错误报告。设置或删除一个配额会创建相应的日志记录。
</p>
<p>
下面的新命令或新选项是用于支持配额的。
前两个是管理员命令。
</p>
<ul>
<li>
<code>dfsadmin -setquota &lt;N> &lt;directory>...&lt;directory></code>
<br />
把每个目录配额设为<code>N</code>。这个命令会在每个目录上尝试,
如果<code>N</code>不是一个正的长整型数,目录不存在或是文件名,
或者目录超过配额,则会产生错误报告。
</li>
<li>
<code>dfsadmin -clrquota &lt;directory>...&lt;director></code><br />
为每个目录删除配额。这个命令会在每个目录上尝试,如果目录不存在或者是文件,则会产生错误报告。如果目录原来没有设置配额不会报错。
</li>
<li>
<code>fs -count -q &lt;directory>...&lt;directory></code><br />
使用<code>-q</code>选项,会报告每个目录设置的配额,以及剩余配额。
如果目录没有设置配额,会报告<code>none</code><code>inf</code>
</li>
</ul>
</body>
</document>

View File

@ -0,0 +1,477 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop Shell命令</title>
</header>
<body>
<!--DCCOMMENT:diff begin-->
<section>
<title> FS Shell </title>
<p>
调用文件系统(FS)Shell命令应使用
<code>bin/hadoop fs &lt;args&gt;</code>的形式。
所有的的FS shell命令使用URI路径作为参数。URI格式是<em>scheme://authority/path</em>。对HDFS文件系统scheme是<em>hdfs</em>对本地文件系统scheme是<em>file</em>。其中scheme和authority参数都是可选的如果未加指定就会使用配置中指定的默认scheme。一个HDFS文件或目录比如<em>/parent/child</em>可以表示成<em>hdfs://namenode:namenodeport/parent/child</em>,或者更简单的<em>/parent/child</em>(假设你配置文件中的默认值是<em>namenode:namenodeport</em>。大多数FS Shell命令的行为和对应的Unix Shell命令类似不同之处会在下面介绍各命令使用详情时指出。出错信息会输出到<em>stderr</em>,其他信息输出到<em>stdout</em>
</p>
<section>
<title> cat </title>
<p>
<code>使用方法hadoop fs -cat URI [URI &#x2026;]</code>
</p>
<!--DCCOMMENT:diff end
@@ -21,17 +21,28 @@
</header>
<body>
<section>
- <title> DFShell </title>
+ <title> FS Shell </title>
<p>
- The HDFS shell is invoked by
- <code>bin/hadoop dfs &lt;args&gt;</code>.
- All the HDFS shell commands take path URIs as arguments. The URI format is <em>scheme://autority/path</em>. For HDFS the scheme is <em>hdfs</em>, and for the local filesystem the scheme is <em>file</em>. The scheme and authority are optional. If not specified, the default scheme specified in the configuration is used. An HDFS file or directory such as <em>/parent/child</em> can be specified as <em>hdfs://namenode:namenodeport/parent/child</em> or simply as <em>/parent/child</em> (given that your configuration is set to point to <em>namenode:namenodeport</em>). Most of the commands in HDFS shell behave like corresponding Unix commands. Differences are described with each of the commands. Error information is sent to <em>stderr</em> and the output is sent to <em>stdout</em>.
+ The FileSystem (FS) shell is invoked by
+ <code>bin/hadoop fs &lt;args&gt;</code>.
+ All the FS shell commands take path URIs as arguments. The URI
+ format is <em>scheme://autority/path</em>. For HDFS the scheme
+ is <em>hdfs</em>, and for the local filesystem the scheme
+ is <em>file</em>. The scheme and authority are optional. If not
+ specified, the default scheme specified in the configuration is
+ used. An HDFS file or directory such as <em>/parent/child</em>
+ can be specified as <em>hdfs://namenodehost/parent/child</em> or
+ simply as <em>/parent/child</em> (given that your configuration
+ is set to point to <em>hdfs://namenodehost</em>). Most of the
+ commands in FS shell behave like corresponding Unix
+ commands. Differences are described with each of the
+ commands. Error information is sent to <em>stderr</em> and the
+ output is sent to <em>stdout</em>.
</p>
- </section>
<section>
<title> cat </title>
<p>
- <code>Usage: hadoop dfs -cat URI [URI &#x2026;]</code>
+ <code>Usage: hadoop fs -cat URI [URI &#x2026;]</code>
</p>
<p>
-->
<p>
将路径指定文件的内容输出到<em>stdout</em>
</p>
<!--DCCOMMENT:diff begin-->
<p>示例:</p>
<ul>
<li>
<code> hadoop fs -cat hdfs://host1:port1/file1 hdfs://host2:port2/file2
</code>
</li>
<li>
<code>hadoop fs -cat file:///file3 /user/hadoop/file4 </code>
</li>
</ul>
<p>返回值:<br/>
<!--DCCOMMENT:diff end
note:"hadoop dfs" has been replaced by "hadoop fs" in this doc.
@@ -39,11 +50,11 @@
<p>Example:</p>
<ul>
<li>
- <code> hadoop dfs -cat hdfs://host1:port1/file1 hdfs://host2:port2/file2
+ <code> hadoop fs -cat hdfs://nn1.example.com/file1 hdfs://nn2.example.com/file2
</code>
</li>
<li>
- <code>hadoop dfs -cat file:///file3 /user/hadoop/file4 </code>
+ <code>hadoop fs -cat file:///file3 /user/hadoop/file4 </code>
</li>
</ul>
<p>Exit Code:<br/>
-->
<code> 成功返回0失败返回-1。</code></p>
</section>
<section>
<title> chgrp </title>
<p>
<code>使用方法hadoop fs -chgrp [-R] GROUP URI [URI &#x2026;]</code>
Change group association of files. With <code>-R</code>, make the change recursively through the directory structure. The user must be the owner of files, or else a super-user. Additional information is in the <a href="hdfs_permissions_guide.html">Permissions User Guide</a>.
-->
</p>
<p>
改变文件所属的组。使用<code>-R</code>将使改变在目录结构下递归进行。命令的使用者必须是文件的所有者或者超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>
</p>
</section>
<section>
<title> chmod </title>
<p>
<code>使用方法hadoop fs -chmod [-R] &lt;MODE[,MODE]... | OCTALMODE&gt; URI [URI &#x2026;]</code>
</p>
<p>
改变文件的权限。使用<code>-R</code>将使改变在目录结构下递归进行。命令的使用者必须是文件的所有者或者超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>
</p>
</section>
<section>
<title> chown </title>
<p>
<code>使用方法hadoop fs -chown [-R] [OWNER][:[GROUP]] URI [URI ]</code>
</p>
<p>
改变文件的拥有者。使用<code>-R</code>将使改变在目录结构下递归进行。命令的使用者必须是超级用户。更多的信息请参见<a href="hdfs_permissions_guide.html">HDFS权限用户指南</a>
</p>
</section>
<section>
<title>copyFromLocal</title>
<p>
<code>使用方法hadoop fs -copyFromLocal &lt;localsrc&gt; URI</code>
</p>
<p>除了限定源路径是一个本地文件外,和<a href="#putlink"><strong>put</strong></a>命令相似。</p>
</section>
<section>
<title> copyToLocal</title>
<p>
<code>使用方法hadoop fs -copyToLocal [-ignorecrc] [-crc] URI &lt;localdst&gt;</code>
</p>
<p>除了限定目标路径是一个本地文件外,和<a href="#getlink"><strong>get</strong></a>命令类似。</p>
</section>
<section>
<title> cp </title>
<p>
<code>使用方法hadoop fs -cp URI [URI &#x2026;] &lt;dest&gt;</code>
</p>
<p>
将文件从源路径复制到目标路径。这个命令允许有多个源路径,此时目标路径必须是一个目录。
<br/>
示例:</p>
<ul>
<li>
<code> hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2</code>
</li>
<li>
<code> hadoop fs -cp /user/hadoop/file1 /user/hadoop/file2 /user/hadoop/dir </code>
</li>
</ul>
<p>返回值:</p>
<p>
<code> 成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title>du</title>
<p>
<code>使用方法hadoop fs -du URI [URI &#x2026;]</code>
</p>
<p>
显示目录中所有文件的大小,或者当只指定一个文件时,显示此文件的大小。<br/>
示例:<br/><code>hadoop fs -du /user/hadoop/dir1 /user/hadoop/file1 hdfs://host:port/user/hadoop/dir1</code><br/>
返回值:<br/><code> 成功返回0失败返回-1。</code><br/></p>
</section>
<section>
<title> dus </title>
<p>
<code>使用方法hadoop fs -dus &lt;args&gt;</code>
</p>
<p>
显示文件的大小。
</p>
</section>
<section>
<title> expunge </title>
<p>
<code>使用方法hadoop fs -expunge</code>
</p>
<p>清空回收站。请参考<a href="hdfs_design.html">HDFS设计</a>文档以获取更多关于回收站特性的信息。
</p>
</section>
<section>
<title id="getlink"> get </title>
<p>
<code>使用方法hadoop fs -get [-ignorecrc] [-crc] &lt;src&gt; &lt;localdst&gt;</code>
<br/>
</p>
<p>
复制文件到本地文件系统。可用<code>-ignorecrc</code>选项复制CRC校验失败的文件。使用<code>-crc</code>选项复制文件以及CRC信息。
</p>
<p>示例:</p>
<ul>
<li>
<code> hadoop fs -get /user/hadoop/file localfile </code>
</li>
<li>
<code> hadoop fs -get hdfs://host:port/user/hadoop/file localfile</code>
</li>
</ul>
<p>返回值:</p>
<p>
<code> 成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title> getmerge </title>
<p>
<code>使用方法hadoop fs -getmerge &lt;src&gt; &lt;localdst&gt; [addnl]</code>
</p>
<p>
接受一个源目录和一个目标文件作为输入,并且将源目录中所有的文件连接成本地目标文件。<code>addnl</code>是可选的,用于指定在每个文件结尾添加一个换行符。
</p>
</section>
<section>
<title> ls </title>
<p>
<code>使用方法hadoop fs -ls &lt;args&gt;</code>
</p>
<p>如果是文件,则按照如下格式返回文件信息:<br/><code>文件名 &lt;副本数&gt; 文件大小 修改日期 修改时间 权限 用户ID 组ID</code><br/>
如果是目录则返回它直接子文件的一个列表就像在Unix中一样。目录返回列表的信息如下<br/><code>目录名 &lt;dir&gt; 修改日期 修改时间 权限 用户ID 组ID</code><br/>
示例:<br/><code>hadoop fs -ls /user/hadoop/file1 /user/hadoop/file2 hdfs://host:port/user/hadoop/dir1 /nonexistentfile</code><br/>
返回值:<br/><code> 成功返回0失败返回-1。</code><br/></p>
</section>
<section>
<title>lsr</title>
<p><code>使用方法hadoop fs -lsr &lt;args&gt;</code><br/>
<code>ls</code>命令的递归版本。类似于Unix中的<code>ls -R</code>
</p>
</section>
<section>
<title> mkdir </title>
<p>
<code>使用方法hadoop fs -mkdir &lt;paths&gt;</code>
<br/>
</p>
<p>接受路径指定的uri作为参数创建这些目录。其行为类似于Unix的mkdir -p它会创建路径中的各级父目录。</p>
<p>示例:</p>
<ul>
<li>
<code>hadoop fs -mkdir /user/hadoop/dir1 /user/hadoop/dir2 </code>
</li>
<li>
<code>hadoop fs -mkdir hdfs://host1:port1/user/hadoop/dir hdfs://host2:port2/user/hadoop/dir
</code>
</li>
</ul>
<p>返回值:</p>
<p>
<code>成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title> movefromLocal </title>
<p>
<code>使用方法dfs -moveFromLocal &lt;src&gt; &lt;dst&gt;</code>
</p>
<p>输出一个”not implemented“信息。
</p>
</section>
<section>
<title> mv </title>
<p>
<code>使用方法hadoop fs -mv URI [URI &#x2026;] &lt;dest&gt;</code>
</p>
<p>
将文件从源路径移动到目标路径。这个命令允许有多个源路径,此时目标路径必须是一个目录。不允许在不同的文件系统间移动文件。
<br/>
示例:
</p>
<ul>
<li>
<code> hadoop fs -mv /user/hadoop/file1 /user/hadoop/file2</code>
</li>
<li>
<code> hadoop fs -mv hdfs://host:port/file1 hdfs://host:port/file2 hdfs://host:port/file3 hdfs://host:port/dir1</code>
</li>
</ul>
<p>返回值:</p>
<p>
<code> 成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title id="putlink"> put </title>
<p>
<code>使用方法hadoop fs -put &lt;localsrc&gt; ... &lt;dst&gt;</code>
</p>
<p>从本地文件系统中复制单个或多个源路径到目标文件系统。也支持从标准输入中读取输入写入目标文件系统。<br/>
</p>
<ul>
<li>
<code> hadoop fs -put localfile /user/hadoop/hadoopfile</code>
</li>
<li>
<code> hadoop fs -put localfile1 localfile2 /user/hadoop/hadoopdir</code>
</li>
<li>
<code> hadoop fs -put localfile hdfs://host:port/hadoop/hadoopfile</code>
</li>
<li><code>hadoop fs -put - hdfs://host:port/hadoop/hadoopfile</code><br/>从标准输入中读取输入。</li>
</ul>
<p>返回值:</p>
<p>
<code> 成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title> rm </title>
<p>
<code>使用方法hadoop fs -rm URI [URI &#x2026;] </code>
</p>
<p>
删除指定的文件。只删除非空目录和文件。请参考rmr命令了解递归删除。<br/>
示例:
</p>
<ul>
<li>
<code> hadoop fs -rm hdfs://host:port/file /user/hadoop/emptydir </code>
</li>
</ul>
<p>返回值:</p>
<p>
<code> 成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title> rmr </title>
<p>
<code>使用方法hadoop fs -rmr URI [URI &#x2026;]</code>
</p>
<p>delete的递归版本。<br/>
示例:
</p>
<ul>
<li>
<code> hadoop fs -rmr /user/hadoop/dir </code>
</li>
<li>
<code> hadoop fs -rmr hdfs://host:port/user/hadoop/dir </code>
</li>
</ul>
<p>返回值:</p>
<p>
<code> 成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title> setrep </title>
<p>
<code>使用方法hadoop fs -setrep [-R] &lt;path&gt;</code>
</p>
<p>
改变一个文件的副本系数。-R选项用于递归改变目录下所有文件的副本系数。
</p>
<p>示例:</p>
<ul>
<li>
<code> hadoop fs -setrep -w 3 -R /user/hadoop/dir1 </code>
</li>
</ul>
<p>返回值:</p>
<p>
<code>成功返回0失败返回-1。</code>
</p>
</section>
<section>
<title> stat </title>
<p>
<code>使用方法hadoop fs -stat URI [URI &#x2026;]</code>
</p>
<p>
返回指定路径的统计信息。
</p>
<p>示例:</p>
<ul>
<li>
<code> hadoop fs -stat path </code>
</li>
</ul>
<p>返回值:<br/>
<code> 成功返回0失败返回-1。</code></p>
</section>
<section>
<title> tail </title>
<p>
<code>使用方法hadoop fs -tail [-f] URI </code>
</p>
<p>
将文件尾部1K字节的内容输出到stdout。支持-f选项行为和Unix中一致。
</p>
<p>示例:</p>
<ul>
<li>
<code> hadoop fs -tail pathname </code>
</li>
</ul>
<p>返回值:<br/>
<code> 成功返回0失败返回-1。</code></p>
</section>
<section>
<title> test </title>
<p>
<code>使用方法hadoop fs -test -[ezd] URI</code>
</p>
<p>
选项:<br/>
-e 检查文件是否存在。如果存在则返回0。<br/>
-z 检查文件是否是0字节。如果是则返回0。 <br/>
-d 如果路径是个目录则返回1否则返回0。<br/></p>
<p>示例:</p>
<ul>
<li>
<code> hadoop fs -test -e filename </code>
</li>
</ul>
</section>
<section>
<title> text </title>
<p>
<code>使用方法hadoop fs -text &lt;src&gt;</code>
<br/>
</p>
<p>
将源文件输出为文本格式。允许的格式是zip和TextRecordInputStream。
</p>
</section>
<section>
<title> touchz </title>
<p>
<code>使用方法hadoop fs -touchz URI [URI &#x2026;]</code>
<br/>
</p>
<p>
创建一个0字节的空文件。
</p>
<p>示例:</p>
<ul>
<li>
<code> hadoop -touchz pathname </code>
</li>
</ul>
<p>返回值:<br/>
<code> 成功返回0失败返回-1。</code></p>
<!--DCCOMMENT:diff begin-->
</section>
</section>
</body>
</document>
<!--DCCOMMENT:diff end
<p>Exit Code:<br/>
<code> Returns 0 on success and -1 on error.</code></p>
</section>
+ </section>
</body>
</document>
-->

View File

@ -0,0 +1,513 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<!--DCCOMMENT:diff begin-->
<header>
<title>
Hadoop分布式文件系统使用指南
</title>
</header>
<body>
<section> <title>目的</title>
<p>
本文档的目标是为Hadoop分布式文件系统HDFS的用户提供一个学习的起点这里的HDFS既可以作为<a href="http://hadoop.apache.org/">Hadoop</a>集群的一部分也可以作为一个独立的分布式文件系统。虽然HDFS在很多环境下被设计成是可正确工作的但是了解HDFS的工作原理对在特定集群上改进HDFS的运行性能和错误诊断都有极大的帮助。
</p>
</section>
<!--DCCOMMENT:diff end
@@ -23,18 +23,18 @@
<header>
<title>
- Hadoop DFS User Guide
+ HDFS User Guide
</title>
</header>
<body>
<section> <title>Purpose</title>
<p>
- This document aims to be the starting point for users working with
+ This document is a starting point for users working with
Hadoop Distributed File System (HDFS) either as a part of a
<a href="http://hadoop.apache.org/">Hadoop</a>
cluster or as a stand-alone general purpose distributed file system.
- While HDFS is designed to "just-work" in many environments, a working
+ While HDFS is designed to "just work" in many environments, a working
knowledge of HDFS helps greatly with configuration improvements and
diagnostics on a specific cluster.
</p>
-->
<!--DCCOMMENT:begin-->
<section> <title> 概述 </title>
<p>
HDFS是Hadoop应用用到的一个最主要的分布式存储系统。一个HDFS集群主要由一个NameNode和很多个Datanode组成Namenode管理文件系统的元数据而Datanode存储了实际的数据。HDFS的体系结构在<a href="hdfs_design.html">这里</a>有详细的描述。本文档主要关注用户以及管理员怎样和HDFS进行交互。<a href="hdfs_design.html">HDFS架构设计</a>中的<a href="images/hdfsarchitecture.gif">图解</a>描述了Namenode、Datanode和客户端之间的基本的交互操作。基本上客户端联系Namenode以获取文件的元数据或修饰属性而真正的文件I/O操作是直接和Datanode进行交互的。
</p>
<p>
下面列出了一些多数用户都比较感兴趣的重要特性。
</p>
<ul>
<li>
<!--DCCOMMENT:end
note:all tag "<em>" has been deleted in this doc.
@@ -43,21 +43,20 @@
<section> <title> Overview </title>
<p>
HDFS is the primary distributed storage used by Hadoop applications. A
- HDFS cluster primarily consists of a <em>NameNode</em> that manages the
- filesystem metadata and Datanodes that store the actual data. The
+ HDFS cluster primarily consists of a NameNode that manages the
+ file system metadata and DataNodes that store the actual data. The
architecture of HDFS is described in detail
<a href="hdfs_design.html">here</a>. This user guide primarily deals with
interaction of users and administrators with HDFS clusters.
The <a href="images/hdfsarchitecture.gif">diagram</a> from
<a href="hdfs_design.html">HDFS architecture</a> depicts
- basic interactions among Namenode, Datanodes, and the clients. Eseentially,
- clients contact Namenode for file metadata or file modifications and perform
- actual file I/O directly with the datanodes.
+ basic interactions among NameNode, the DataNodes, and the clients.
+ Clients contact NameNode for file metadata or file modifications and perform
+ actual file I/O directly with the DataNodes.
</p>
<p>
The following are some of the salient features that could be of
- interest to many users. The terms in <em>italics</em>
- are described in later sections.
+ interest to many users.
</p>
<ul>
<li>
-->
Hadoop包括HDFS非常适合在商用硬件commodity hardware上做分布式存储和计算因为它不仅具有容错性和可扩展性而且非常易于扩展。<a href="mapred_tutorial.html">Map-Reduce</a>框架以其在大型分布式系统应用上的简单性和可用性而著称这个框架已经被集成进Hadoop中。
</li>
<li>
HDFS的可配置性极高同时它的默认配置能够满足很多的安装环境。多数情况下这些参数只在非常大规模的集群环境下才需要调整。
</li>
<!--DCCOMMENT:diff begin-->
<li>
用Java语言开发支持所有的主流平台。
</li>
<li>
支持类Shell命令可直接和HDFS进行交互。
</li>
<li>
NameNode和DataNode有内置的Web服务器方便用户检查集群的当前状态。
</li>
<!--DCCOMMENT:diff end
@@ -74,13 +73,13 @@
needs to be tuned only for very large clusters.
</li>
<li>
- It is written in Java and is supported on all major platforms.
+ Hadoop is written in Java and is supported on all major platforms.
</li>
<li>
- Supports <em>shell like commands</em> to interact with HDFS directly.
+ Hadoop supports shell-like commands to interact with HDFS directly.
</li>
<li>
- Namenode and Datanodes have built in web servers that makes it
+ The NameNode and Datanodes have built in web servers that makes it
easy to check current status of the cluster.
</li>
<li>
-->
<li>
新特性和改进会定期加入HDFS的实现中。下面列出的是HDFS中常用特性的一部分
<ul>
<li>
文件权限和授权。
</li>
<li>
机架感知Rack awareness在调度任务和分配存储空间时考虑节点的物理位置。
</li>
<li>
安全模式:一种维护需要的管理模式。
</li>
<li>
fsck一个诊断文件系统健康状况的工具能够发现丢失的文件或数据块。
</li>
<li>
Rebalancer当datanode之间数据不均衡时平衡集群上的数据负载。
</li>
<li>
升级和回滚在软件更新后有异常发生的情形下能够回滚到HDFS升级之前的状态。
</li>
<li>
Secondary Namenode对文件系统名字空间执行周期性的检查点将Namenode上HDFS改动日志文件的大小控制在某个特定的限度下。
</li>
</ul>
</li>
</ul>
</section> <section> <title> 先决条件 </title>
<p>
下面的文档描述了如何安装和搭建Hadoop集群
</p>
<ul>
<li>
<a href="quickstart.html">Hadoop快速入门</a>
针对初次使用者。
</li>
<li>
<a href="cluster_setup.html">Hadoop集群搭建</a>
针对大规模分布式集群的搭建。
</li>
</ul>
<p>
文档余下部分假设用户已经安装并运行了至少包含一个Datanode节点的HDFS。就本文目的来说Namenode和Datanode可以运行在同一个物理主机上。
</p>
</section> <section> <title> Web接口 </title>
<!--DCCOMMENT:diff begin-->
<p>
NameNode和DataNode各自启动了一个内置的Web服务器显示了集群当前的基本状态和信息。在默认配置下NameNode的首页地址是<code>http://namenode-name:50070/</code>。这个页面列出了集群里的所有DataNode和集群的基本状态。这个Web接口也可以用来浏览整个文件系统使用NameNode首页上的"Browse the file system"链接)。
</p>
<!--DCCOMMENT:diff end
</section> <section> <title> Web Interface </title>
<p>
- Namenode and Datanode each run an internal web server in order to
+ NameNode and DataNode each run an internal web server in order to
display basic information about the current status of the cluster.
- With the default configuration, namenode front page is at
- <code>http://namenode:50070/</code> .
- It lists the datanodes in the cluster and basic stats of the
+ With the default configuration, the NameNode front page is at
+ <code>http://namenode-name:50070/</code>.
+ It lists the DataNodes in the cluster and basic statistics of the
cluster. The web interface can also be used to browse the file
- system (using "Browse the file system" link on the Namenode front
+ system (using "Browse the file system" link on the NameNode front
page).
</p>
-->
<!--DCCOMMENT:diff begin-->
</section> <section> <title>Shell命令</title>
<p>Hadoop包括一系列的类shell的命令可直接和HDFS以及其他Hadoop支持的文件系统进行交互。<code>bin/hadoop fs -help</code> 命令列出所有Hadoop Shell支持的命令。而 <code>bin/hadoop fs -help command-name</code> 命令能显示关于某个命令的详细信息。这些命令支持大多数普通文件系统的操作比如复制文件、改变文件权限等。它还支持一些HDFS特有的操作比如改变文件副本数目。
</p>
<!--DCCOMMENT:diff end
</section> <section> <title>Shell Commands</title>
<p>
- Hadoop includes various "shell-like" commands that directly
+ Hadoop includes various shell-like commands that directly
interact with HDFS and other file systems that Hadoop supports.
The command
<code>bin/hadoop fs -help</code>
lists the commands supported by Hadoop
- shell. Further,
- <code>bin/hadoop fs -help command</code>
- displays more detailed help on a command. The commands support
- most of the normal filesystem operations like copying files,
+ shell. Furthermore, the command
+ <code>bin/hadoop fs -help command-name</code>
+ displays more detailed help for a command. These commands support
+ most of the normal files ystem operations like copying files,
changing file permissions, etc. It also supports a few HDFS
specific operations like changing replication of files.
</p>
-->
<section> <title> DFSAdmin命令 </title>
<p>
<code>'bin/hadoop dfsadmin'</code> 命令支持一些和HDFS管理相关的操作。<code>bin/hadoop dfsadmin -help</code> 命令能列出所有当前支持的命令。比如:
</p>
<ul>
<li>
<!--DCCOMMENT:diff begin-->
<code>-report</code>报告HDFS的基本统计信息。有些信息也可以在NameNode Web服务首页看到。
<!--DCCOMMENT:diff end
note: "Namenode" is replaced by "NameNode" in this doc
<li>
<code>-report</code>
- : reports basic stats of HDFS. Some of this information is
- also available on the Namenode front page.
+ : reports basic statistics of HDFS. Some of this information is
+ also available on the NameNode front page.
</li>
-->
</li>
<li>
<code>-safemode</code>虽然通常并不需要但是管理员的确可以手动让NameNode进入或离开安全模式。
</li>
<li>
<code>-finalizeUpgrade</code>:删除上一次升级时制作的集群备份。
</li>
</ul>
</section>
</section> <section> <title> Secondary NameNode </title>
<p>NameNode将对文件系统的改动追加保存到本地文件系统上的一个日志文件<code>edits</code>。当一个NameNode启动时它首先从一个映像文件<code>fsimage</code>中读取HDFS的状态接着应用日志文件中的edits操作。然后它将新的HDFS状态写入<code>fsimage</code>并使用一个空的edits文件开始正常操作。因为NameNode只有在启动阶段才合并<code>fsimage</code><code>edits</code>所以久而久之日志文件可能会变得非常庞大特别是对大型的集群。日志文件太大的另一个副作用是下一次NameNode启动会花很长时间。
</p>
<p>
Secondary NameNode定期合并fsimage和edits日志将edits日志文件大小控制在一个限度下。因为内存需求和NameNode在一个数量级上所以通常secondary NameNode和NameNode运行在不同的机器上。Secondary NameNode通过<code>bin/start-dfs.sh</code><code>conf/masters</code>中指定的节点上启动。
</p>
<!--DCCOMMENT:diff begin-->
<p>
Secondary NameNode的检查点进程启动是由两个配置参数控制的
</p>
<ul>
<li>
<code>fs.checkpoint.period</code>,指定连续两次检查点的最大时间间隔,
默认值是1小时。
</li>
<li>
<code>fs.checkpoint.size</code>定义了edits日志文件的最大值一旦超过这个值会导致强制执行检查点即使没到检查点的最大时间间隔。默认值是64MB。
</li>
</ul>
<p>
Secondary NameNode保存最新检查点的目录与NameNode的目录结构相同。
所以NameNode可以在需要的时候读取Secondary NameNode上的检查点镜像。
</p>
<p>
如果NameNode上除了最新的检查点以外所有的其他的历史镜像和edits文件都丢失了
NameNode可以引入这个最新的检查点。以下操作可以实现这个功能
</p>
<ul>
<li>
在配置参数<code>dfs.name.dir</code>指定的位置建立一个空文件夹;
</li>
<li>
把检查点目录的位置赋值给配置参数<code>fs.checkpoint.dir</code>
</li>
<li>
启动NameNode并加上<code>-importCheckpoint</code>
</li>
</ul>
<p>
NameNode会从<code>fs.checkpoint.dir</code>目录读取检查点,
并把它保存在<code>dfs.name.dir</code>目录下。
如果<code>dfs.name.dir</code>目录下有合法的镜像文件NameNode会启动失败。
NameNode会检查<code>fs.checkpoint.dir</code>目录下镜像文件的一致性,但是不会去改动它。
</p>
<p>
命令的使用方法请参考<a href="commands_manual.html#secondarynamenode"><code>secondarynamenode</code> 命令</a>.
</p>
<!--DCCOMMENT:diff end
+ <p>
+ The start of the checkpoint process on the secondary NameNode is
+ controlled by two configuration parameters.
+ </p>
+ <ul>
+ <li>
+ <code>fs.checkpoint.period</code>, set to 1 hour by default, specifies
+ the maximum delay between two consecutive checkpoints, and
+ </li>
+ <li>
+ <code>fs.checkpoint.size</code>, set to 64MB by default, defines the
+ size of the edits log file that forces an urgent checkpoint even if
+ the maximum checkpoint delay is not reached.
+ </li>
+ </ul>
+ <p>
+ The secondary NameNode stores the latest checkpoint in a
+ directory which is structured the same way as the primary NameNode's
+ directory. So that the check pointed image is always ready to be
+ read by the primary NameNode if necessary.
+ </p>
+ <p>
+ The latest checkpoint can be imported to the primary NameNode if
+ all other copies of the image and the edits files are lost.
+ In order to do that one should:
+ </p>
+ <ul>
+ <li>
+ Create an empty directory specified in the
+ <code>dfs.name.dir</code> configuration variable;
+ </li>
+ <li>
+ Specify the location of the checkpoint directory in the
+ configuration variable <code>fs.checkpoint.dir</code>;
+ </li>
+ <li>
+ and start the NameNode with <code>-importCheckpoint</code> option.
+ </li>
+ </ul>
+ <p>
+ The NameNode will upload the checkpoint from the
+ <code>fs.checkpoint.dir</code> directory and then save it to the NameNode
+ directory(s) set in <code>dfs.name.dir</code>.
+ The NameNode will fail if a legal image is contained in
+ <code>dfs.name.dir</code>.
+ The NameNode verifies that the image in <code>fs.checkpoint.dir</code> is
+ consistent, but does not modify it in any way.
+ </p>
+ <p>
+ For command usage, see <a href="commands_manual.html#secondarynamenode"><code>secondarynamenode</code> command</a>.
+ </p>
</section> <section> <title> Rebalancer </title>
-->
</section> <section> <title> Rebalancer </title>
<!--DCCOMMENT:diff begin-->
<p>
HDFS的数据也许并不是非常均匀的分布在各个DataNode中。一个常见的原因是在现有的集群上经常会增添新的DataNode节点。当新增一个数据块一个文件的数据被保存在一系列的块中NameNode在选择DataNode接收这个数据块之前会考虑到很多因素。其中的一些考虑的是
</p>
<!--DCCOMMENT:diff end
note : "datanode" is replaced by "DataNode" in this doc.
HDFS data might not always be be placed uniformly across the
- datanode. One common reason is addition of new datanodes to an
- existing cluster. While placing new <em>blocks</em> (data for a file is
- stored as a series of blocks), Namenode considers various
- parameters before choosing the datanodes to receive these blocks.
- Some of the considerations are :
+ DataNode. One common reason is addition of new DataNodes to an
+ existing cluster. While placing new blocks (data for a file is
+ stored as a series of blocks), NameNode considers various
+ parameters before choosing the DataNodes to receive these blocks.
+ Some of the considerations are:
</p>
-->
<ul>
<li>
将数据块的一个副本放在正在写这个数据块的节点上。
</li>
<li>
尽量将数据块的不同副本分布在不同的机架上,这样集群可在完全失去某一机架的情况下还能存活。
</li>
<li>
一个副本通常被放置在和写文件的节点同一机架的某个节点上这样可以减少跨越机架的网络I/O。
</li>
<li>
尽量均匀地将HDFS数据分布在集群的DataNode中。
</li>
</ul>
<p>
由于上述多种考虑需要取舍数据可能并不会均匀分布在DataNode中。HDFS为管理员提供了一个工具用于分析数据块分布和重新平衡DataNode上的数据分布。<a href="http://issues.apache.org/jira/browse/HADOOP-1652">HADOOP-1652</a>的附件中的一个<a href="http://issues.apache.org/jira/secure/attachment/12368261/RebalanceDesign6.pdf">PDF</a>是一个简要的rebalancer管理员指南。
</p>
<!--DCCOMMENT:diff begin-->
<p>
使用方法请参考<a href="commands_manual.html#balancer">balancer 命令</a>.
</p>
<!--DCCOMMENT:diff end
<a href="http://issues.apache.org/jira/browse/HADOOP-1652">HADOOP-1652</a>.
</p>
+ <p>
+ For command usage, see <a href="commands_manual.html#balancer">balancer command</a>.
+ </p>
</section> <section> <title> Rack Awareness </title>
-->
</section> <section> <title> 机架感知Rack awareness </title>
<p>
通常大型Hadoop集群是以机架的形式来组织的同一个机架上不同节点间的网络状况比不同机架之间的更为理想。另外NameNode设法将数据块副本保存在不同的机架上以提高容错性。Hadoop允许集群的管理员通过配置<code>dfs.network.script</code>参数来确定节点所处的机架。当这个脚本配置完毕每个节点都会运行这个脚本来获取它的机架ID。默认的安装假定所有的节点属于同一个机架。这个特性及其配置参数在<a href="http://issues.apache.org/jira/browse/HADOOP-692">HADOOP-692</a>所附的<a href="http://issues.apache.org/jira/secure/attachment/12345251/Rack_aware_HDFS_proposal.pdf">PDF</a>上有更详细的描述。
</p>
</section> <section> <title> 安全模式 </title>
<p>
NameNode启动时会从fsimage和edits日志文件中装载文件系统的状态信息接着它等待各个DataNode向它报告它们各自的数据块状态这样NameNode就不会过早地开始复制数据块即使在副本充足的情况下。这个阶段NameNode处于安全模式下。NameNode的安全模式本质上是HDFS集群的一种只读模式此时集群不允许任何对文件系统或者数据块修改的操作。通常NameNode会在开始阶段自动地退出安全模式。如果需要你也可以通过<code>'bin/hadoop dfsadmin -safemode'</code>命令显式地将HDFS置于安全模式。NameNode首页会显示当前是否处于安全模式。关于安全模式的更多介绍和配置信息请参考JavaDoc<a href="http://hadoop.apache.org/core/docs/current/api/org/apache/hadoop/dfs/NameNode.html#setSafeMode(org.apache.hadoop.dfs.FSConstants.SafeModeAction)"><code>setSafeMode()</code></a>
</p>
</section> <section> <title> fsck </title>
<p>
HDFS支持<code>fsck</code>命令来检查系统中的各种不一致状况。这个命令被设计来报告各种文件存在的问题比如文件缺少数据块或者副本数目不够。不同于在本地文件系统上传统的fsck工具这个命令并不会修正它检测到的错误。一般来说NameNode会自动修正大多数可恢复的错误。HDFS的fsck不是一个Hadoop shell命令。它通过'<code>bin/hadoop fsck</code>'执行。
<!--DCCOMMENT:diff begin-->
命令的使用方法请参考<a href="commands_manual.html#fsck"><code>fsck</code>命令</a>
<code>fsck</code>可用来检查整个文件系统,也可以只检查部分文件。
<!--DCCOMMENT:diff end
Hadoop shell command. It can be run as '<code>bin/hadoop fsck</code>'.
- Fsck can be run on the whole filesystem or on a subset of files.
+ For command usage, see <a href="commands_manual.html#fsck"><code>fsck</code> command</a>.
+ <code>fsck</code> can be run on the whole file system or on a subset of files.
</p>
-->
</p>
</section> <section> <title> 升级和回滚 </title>
<p>当在一个已有集群上升级Hadoop时像其他的软件升级一样可能会有新的bug或一些会影响到现有应用的非兼容性变更出现。在任何有实际意义的HDSF系统上丢失数据是不被允许的更不用说重新搭建启动HDFS了。HDFS允许管理员退回到之前的Hadoop版本并将集群的状态回滚到升级之前。更多关于HDFS升级的细节在<a href="http://wiki.apache.org/hadoop/Hadoop%20Upgrade">升级wiki</a>上可以找到。HDFS在一个时间可以有一个这样的备份。在升级之前管理员需要用<code>bin/hadoop dfsadmin -finalizeUpgrade</code>(升级终结操作)命令删除存在的备份文件。下面简单介绍一下一般的升级过程:
</p>
<ul>
<li>升级 Hadoop 软件之前,请检查是否已经存在一个备份,如果存在,可执行升级终结操作删除这个备份。通过<code>dfsadmin -upgradeProgress status</code>命令能够知道是否需要对一个集群执行升级终结操作。</li>
<li>停止集群并部署新版本的Hadoop。</li>
<li>使用<code>-upgrade</code>选项运行新的版本(<code>bin/start-dfs.sh -upgrade</code>)。
</li>
<li>在大多数情况下集群都能够正常运行。一旦我们认为新的HDFS运行正常也许经过几天的操作之后就可以对之执行升级终结操作。注意在对一个集群执行升级终结操作之前删除那些升级前就已经存在的文件并不会真正地释放DataNodes上的磁盘空间。</li>
<li>如果需要退回到老版本,
<ul>
<li>停止集群并且部署老版本的Hadoop。</li>
<li>用回滚选项启动集群(<code>bin/start-dfs.h -rollback</code>)。</li>
</ul>
</li>
</ul>
</section> <section> <title> 文件权限和安全性 </title>
<p>
这里的文件权限和其他常见平台如Linux的文件权限类似。目前安全性仅限于简单的文件权限。启动NameNode的用户被视为HDFS的超级用户。HDFS以后的版本将会支持网络验证协议比如Kerberos来对用户身份进行验证和对数据进行加密传输。具体的细节请参考<a href="hdfs_permissions_guide.html">权限使用管理指南</a>
</p>
</section> <section> <title> 可扩展性 </title>
<p>
现在Hadoop已经运行在上千个节点的集群上。<a href="http://wiki.apache.org/hadoop/PoweredBy">Powered By Hadoop</a>页面列出了一些已将Hadoop部署在他们的大型集群上的组织。HDFS集群只有一个NameNode节点。目前NameNode上可用内存大小是一个主要的扩展限制。在超大型的集群中增大HDFS存储文件的平均大小能够增大集群的规模而不需要增加NameNode的内存。默认配置也许并不适合超大规模的集群。<a href="http://wiki.apache.org/hadoop/FAQ">Hadoop FAQ</a>页面列举了针对大型Hadoop集群的配置改进。</p>
</section> <section> <title> 相关文档 </title>
<p>
这个用户手册给用户提供了一个学习和使用HDSF文件系统的起点。本文档会不断地进行改进同时用户也可以参考更多的Hadoop和HDFS文档。下面的列表是用户继续学习的起点
</p>
<ul>
<li>
<a href="http://hadoop.apache.org/">Hadoop官方主页</a>所有Hadoop相关的起始页。
</li>
<li>
<a href="http://wiki.apache.org/hadoop/FrontPage">Hadoop Wiki</a>Hadoop Wiki文档首页。这个指南是Hadoop代码树中的一部分与此不同Hadoop Wiki是由Hadoop社区定期编辑的。
</li>
<li>Hadoop Wiki上的<a href="http://wiki.apache.org/hadoop/FAQ">FAQ</a>
</li>
<li>Hadoop <a href="http://hadoop.apache.org/core/docs/current/api/">JavaDoc API</a></li>
<li>Hadoop用户邮件列表<a href="mailto:core-user@hadoop.apache.org">core-user[at]hadoop.apache.org</a></li>
<li>查看<code>conf/hadoop-default.xml</code>文件。这里包括了大多数配置参数的简要描述。</li>
<li>
<a href="commands_manual.html">命令手册</a>:命令使用说明。
</li>
<!--DCCOMMENT:diff begin-->
<!--DCCOMMENT:diff end
@@ -411,6 +469,10 @@
It includes brief
description of most of the configuration variables available.
</li>
+ <li>
+ <a href="commands_manual.html">Commands Manual</a>
+ : commands usage.
+ </li>
</ul>
</section>
-->
</ul>
</section>
</body>
</document>

View File

@ -0,0 +1,46 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
Hadoop On Demand
</title>
</header>
<body>
<section>
<title>简介</title>
<p>Hadoop On Demand(HOD)是一个能在大型物理集群上供应虚拟hadoop集群的系统。它使用Torque资源管理器分配节点。它可以在分配的节点上启动Hadoop Map/Reduce和HDFS的守护进程。它会自动为Hadoop守护进程和客户端生成合适的配置文件hadoop-site.xml。HOD还可以将Hadoop分发到它分配出来的虚拟Hadoop集群的节点上。简而言之HOD使管理员和用户轻松地快速搭建和使用hadoop。它也是Hadoop开发人员和测试人员非常有用的一个工具他们可以使用HOD共享一个物理集群来测试各自的Hadoop版本。</p>
</section>
<section>
<title>文档</title>
<p>读一遍下面的文档你会在使用HOD方面了解更多</p>
<ul>
<li><a href="hod_admin_guide.html">HOD管理指南</a> : 此指南概述了HOD的体系结构Torque资源管理器及其他各种支持工具也会告诉你如何安装配置和运行HOD。</li>
<li><a href="hod_config_guide.html">HOD配置指南</a> : 此指南讨论HOD的配置段会告诉你如何使用那些最重要和最常用的配置项。</li>
<li><a href="hod_user_guide.html">HOD用户指南</a> : 此指南会告诉你如何开始使用HOD它的各种功能特性命令行选项也会给你一些故障解决方面的详细帮助。</li>
</ul>
</section>
</body>
</document>

View File

@ -0,0 +1,237 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
Hadoop On Demand
</title>
</header>
<body>
<section>
<title>概述</title>
<p>Hadoop On Demand (HOD)是一个能在一个共享集群上供应和管理相互独立的Hadoop Map/Reduce和Hadoop分布式文件系统HDFS实例的系统。它能让管理员和用户轻松地快速搭建和使用hadoop。HOD对Hadoop的开发人员和测试人员也非常有用他们可以通过HOD共享一个物理集群来测试各自不同的Hadoop版本。</p>
<p>HOD依赖资源管理器(RM)来分配节点这些节点被用来在之上运行hadoop实例。目前HOD采用的是<a href="ext:hod/torque">Torque资源管理器</a>
</p>
<p>
基本的HOD系统架构包含的下列组件</p>
<ul>
<li>一个资源管理器(可能同时附带一个调度程序)</li>
<li>各种HOD的组件 </li>
<li>Hadoop Map/Reduce和HDFS守护进程</li>
</ul>
<p>
通过与以上组件交互HOD在给定的集群上供应和维护Hadoop Map/Reduce实例或者HDFS实例。集群中的节点可看作由两组节点构成</p>
<ul>
<li>提交节点(Submit nodes)用户通过HOD客户端在这些节点上申请集群之后通过Hadoop客户端提交Hadoop作业。</li>
<li>计算节点(Compute nodes)利用资源管理器HOD组件在这些节点上运行以供应Hadoop守护进程。之后Hadoop作业在这些节点上运行。</li>
</ul>
<p>
下面是对申请集群及在之上运行作业所需操作步骤的简要描述。
</p>
<ul>
<li>用户在提交节点上用HOD客户端分配所需数目节点的集群在上面供应Hadoop。</li>
<li>HOD客户端利用资源管理器接口在Torque中是qsub提交一个被称为RingMaster的HOD进程作为一个资源管理器作业申请理想数目的节点。这个作业被提交到资源管理器的中央服务器上在Torque中叫pbs_server</li>
<li>在计算节点上,资源管理器的从(slave)守护程序(Torque中的pbs_moms)接受并处理中央服务器(Torque中的pbs_server)分配的作业。RingMaster进程在其中一个计算节点Torque中的mother superior上开始运行。</li>
<li>之后Ringmaster通过资源管理器的另外一个接口(在Torque中是pbsdsh)在所有分配到的计算节点上运行第二个HOD组件HodRing即分布式任务。</li>
<li>HodRing初始化之后会与RingMaster通信获取Hadoop指令并遵照执行。一旦Hadoop的命令开始启动它们会向RingMaster登记提供关于守护进程的信息。</li>
<li>Hadoop实例所需的配置文件全部由HOD自己生成有一些来自于用户在配置文件设置的选项。</li>
<li>HOD客户端保持和RingMaster的通信找出JobTracker和HDFS守护进程的位置所在。</li>
</ul>
<p>之后的文档会讲述如何在一个物理集群的节点上安装HOD。</p>
</section>
<section>
<title>先决条件</title>
<p>要使用HOD你的系统应包含下列的硬件和软件</p>
<p>操作系统: HOD目前在RHEL4上测试通过。<br/>
节点HOD至少需要3个由资源管理器配置的节点。<br/></p>
<p>软件</p>
<p>在使用HOD之前以下组件必须被安装到所有节点上</p>
<ul>
<li>Torque资源管理器</li>
<li><a href="ext:hod/python">Python</a>HOD要求Python 2.5.1</li>
</ul>
<p>下列组件是可选的你可以安装以获取HOD更好的功能</p>
<ul>
<li><a href="ext:hod/twisted-python">Twisted Python</a>这个可以用来提升HOD的可扩展性。如果检测到这个模块已安装HOD就用它否则就使用默认的模块。</li>
<li><a href="ext:site">Hadoop</a>HOD能自动将Hadoop分发到集群的所有节点上。不过如果Hadoop在所有节点上已经可用HOD也可以使用已经安装好的Hadoop。HOD目前支持Hadoop 0.15和其后续版本。</li>
</ul>
<p>注释: HOD的配置需要以上这些组件的安装位置在集群所有节点上保持一致。如果在提交节点上的安装位置也相同配置起来会更简单。</p>
</section>
<section>
<title>资源管理器</title>
<p>目前HOD使用Torque资源管理器来分配节点和提交作业。Torque是一个开源的资源管理器来自于<a href="ext:hod/cluster-resources">Cluster Resources</a>是一个社区基于PBS项目努力的结晶。它提供对批处理作业和分散的计算节点(Compute nodes)的控制。你可以自由地从<a href="ext:hod/torque-download">此处</a>下载Torque。</p>
<p>所有torque相关的文档可以在<a href="ext:hod/torque-docs">这儿</a>的TORQUE Resource Manager一节找到。在<a href="ext:hod/torque-wiki">这里</a>可以看到wiki文档。如果想订阅TORQUE的邮件列表或查看问题存档访问<a href="ext:hod/torque-mailing-list">这里</a></p>
<p>使用带Torque的HOD</p>
<ul>
<li>安装Torque组件在一个节点上(head node)安装pbs_server所有计算节点上安装pbs_mom所有计算节点和提交节点上安装PBS客户端。至少做最基本的配置使Torque系统跑起来也就是使pbs_server能知道该和哪些机器通话。查看<a href="ext:hod/torque-basic-config">这里</a>可以了解基本配置。要了解高级配置,请查看<a href="ext:hod/torque-advanced-config">这里</a></li>
<li>在pbs_server上创建一个作业提交队列。队列的名字和HOD的配置参数resource-manager.queue相同。Hod客户端利用此队列提交RingMaster进程作为Torque作业。</li>
<li>在集群的所有节点上指定一个cluster name作为property。这可以用qmgr命令做到。比如<code>qmgr -c "set node node properties=cluster-name"</code>。集群名字和HOD的配置参数hod.cluster是相同的。</li>
<li>确保作业可以提交到节点上去。这可以通过使用qsub命令做到。比如<code>echo "sleep 30" | qsub -l nodes=3</code></li>
</ul>
</section>
<section>
<title>安装HOD</title>
<p>现在资源管理器已经安装好了我们接着下载并安装HOD。</p>
<ul>
<li>如果你想从Hadoop tar包中获取HOD它在'contrib'下的'hod'的根目录下。</li>
<li>如果你从编译源码可以在Hadoop根目录下的运行ant tar, 生成Hadoop tar包。然后从获取HOD参照上面。</li>
<li>把这个目录下的所有文件分发到集群的所有节点上。注意文件拷贝的位置应在所有节点上保持一致。</li>
<li>注意编译hadoop时会创建HOD同时会正确地设置所有HOD必须的脚本文件的权限。</li>
</ul>
</section>
<section>
<title>配置HOD</title>
<p>安装HOD后你就可以配置它。为了运行HOD需要做的最小配置会在下面讲述更多高级的配置会在HOD配置指南里面讲解。</p>
<section>
<title>最小配置</title>
<p>为运行HOD以下的最小配置是必须要做的</p>
<ul>
<li>在你想要运行hod的节点上编辑&lt;install dir&gt;/conf目录下的hodrc文件。这个文件包含了运行hod所必需的最少量的设置。</li>
<li>
<p>为这个配置文件中的定义的变量指定适合你环境的值。注意,有些变量在文件中出现了不止一次。</p>
<ul>
<li>${JAVA_HOME}Hadoop的Java的安装位置。Hadoop支持Sun JDK 1.5.x及以上版本。</li>
<li>${CLUSTER_NAME}:集群名称,由'node property'指定,在资源管理器配置中曾提到过。</li>
<li>${HADOOP_HOME}Hadoop在计算节点和提交节点上的安装位置。</li>
<li>${RM_QUEUE}:在资源管理器配置中设置的作业提交队列。</li>
<li>${RM_HOME}:资源管理器在计算节点和提交节点的安装位置。</li>
</ul>
</li>
<li>
<p>以下环境变量可能需要设置取决于你的系统环境。在你运行HOD客户端的地方这些变量必须被定义也必须在HOD配置文件中通过设定resource_manager.env-vars的值指定。多个变量可指定为用逗号分隔的key=value对组成的列表。</p>
<ul>
<li>HOD_PYTHON_HOME如果python安装在计算节点或提交节点的非默认位置那么这个值必须设定为python的可执行文件的实际位置。</li>
</ul>
</li>
</ul>
</section>
<section>
<title>高级配置</title>
<p>你可以检查和修改其它配置选项来满足你的特定需要。关于HOD配置的更多信息请参考<a href="hod_config_guide.html">配置指南</a></p>
</section>
</section>
<section>
<title>运行HOD</title>
<p>当HOD配置好后你就可以运行它了。更多信息请参考<a href="hod_user_guide.html">HOD用户指南</a></p>
</section>
<section>
<title>支持工具和实用程序</title>
<p>此节描述一些可用于管理HOD部署的支持工具和应用程序。</p>
<section>
<title>logcondense.py - 管理日志文件</title>
<p><a href="hod_user_guide.html#收集和查看Hadoop日志">HOD用户指南</a>有提到HOD可配置成将Hadoop日志上传到一个配置好的静态HDFS上。随着时间增加日志数量会不断增长。logcondense.py可以帮助管理员清理上传到HDFS的日志文件。</p>
<section>
<title>运行logcondense.py</title>
<p>logcondense.py在hod_install_location/support文件夹下。你可以使用python去运行它比如<em>python logcondense.py</em>,或者授以执行权限,直接运行<em>logcondense.py</em>。如果启用了权限logcondense.py需要被有足够权限能删除HDFS上上传目录下日志文件的用户运行。比如<a href="hod_config_guide.html#3.7+hodring的配置项">配置指南</a>中提及过用户可以配置将日志放在HDFS上的其主目录下。在这种情况下你需要具有超级用户权限才能运行logcondense.py删除所有用户主目录下的日志文件。</p>
</section>
<section>
<title>logcondense.py的命令行选项</title>
<p>logcondense.py支持以下命令行选项</p>
<table>
<tr>
<td>短选项</td>
<td>长选项</td>
<td>含义</td>
<td>例子</td>
</tr>
<tr>
<td>-p</td>
<td>--package</td>
<td>hadoop脚本的全路径。Hadoop的版本必须和运行HDFS的版本一致。</td>
<td>/usr/bin/hadoop</td>
</tr>
<tr>
<td>-d</td>
<td>--days</td>
<td>删除超过指定天数的日志文件</td>
<td>7</td>
</tr>
<tr>
<td>-c</td>
<td>--config</td>
<td>Hadoop配置目录的路径hadoop-site.xml存在于此目录中。hadoop-site.xml中须指明待删除日志存放的HDFS的NameNode。</td>
<td>/home/foo/hadoop/conf</td>
</tr>
<tr>
<td>-l</td>
<td>--logs</td>
<td>一个HDFS路径须和log-destination-uri指定的是同一个HDFS路径不带hdfs:// URI串这点在<a href="hod_config_guide.html#3.7+hodring的配置项">配置指南</a>中提到过。</td>
<td>/user</td>
</tr>
<tr>
<td>-n</td>
<td>--dynamicdfs</td>
<td>如果为truelogcondense.py除要删除Map/Reduce日志之外还需删除HDFS日志。否则它只删除Map/Reduce日志这也是不指定这个选项时的默认行为。这个选项对下面的情况非常有用一个动态的HDFS由HOD供应一个静态的HDFS用来收集日志文件 - 也许这是测试集群中一个非常普遍的使用场景。</td>
<td>false</td>
</tr>
</table>
<p>比如假如要删除所有7天之前的日志文件hadoop-site.xml存放在~/hadoop-conf下hadoop安装于~/hadoop-0.17.0,你可以这样:</p>
<p><em>python logcondense.py -p ~/hadoop-0.17.0/bin/hadoop -d 7 -c ~/hadoop-conf -l /user</em></p>
</section>
</section>
<section>
<title>checklimits.sh - 监视资源限制</title>
<p>checklimits.sh是一个针对Torque/Maui环境的HOD工具<a href="ext:hod/maui">Maui集群调度器</a> 是一个用于集群和超级计算机的开源作业调度器来自clusterresourcces。当新提交的作业违反或超过用户在Maui调度器里设置的限制时checklimits.sh脚本更新torque的comment字段。它使用qstat在torque的job-list中做一次遍历确定作业是在队列中还是已完成运行Maui工具checkjob检查每一个作业是否违反用户限制设定之后运行torque的qalter工具更新作业的'comment'的属性。当前它把那些违反限制的作业的comment的值更新为<em>User-limits exceeded. Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)</em>。之后HOD根据这个注释内容做出相应处理。
</p>
<section>
<title>运行checklimits.sh</title>
<p>checklimits.sh可以在hod_install_location/support目录下下找到。在具有得执行权限后这个shell脚本可以直接通过<em>sh checklimits.sh </em>或者<em>./checklimits.sh</em>运行。这个工具运行的机器上应有Torque和Maui的二进制运行文件并且这些文件要在这个shell脚本进程的路径中。为了更新不同用户作业的comment值这个工具必须以torque的管理员权限运行。这个工具必须按照一定时间间隔重复运行来保证更新job的约束条件比如可以通过cron。请注意这个脚本中用到的资源管理器和调度器命令运行代价可能会比价大所以最好不要在没有sleeping的紧凑循环中运行。
</p>
</section>
</section>
<section>
<title>verify-account - 用于核实用户提交作业所使用的帐号的脚本</title>
<p>生产系统一般使用帐号系统来对使用共享资源的用户收费。HOD支持一个叫<em>resource_manager.pbs-account</em>的参数,用户可以通过这个参数来指定提交作业时使用的帐号。核实这个帐户在帐号管理系统中的有效性是有必要的。脚本<em>hod-install-dir/bin/verify-account</em>提供了一种机制让用户插入自定义脚本来实现这个核实过程。</p>
<section>
<title>在HOD中集成verify-account</title>
<p>在分配集群之前HOD运行<em>verify-account</em>脚本,将<em>resource_manager.pbs-account</em>的值作为参数传递给用户自定义脚本来完成用户的确认。系统还可以通过这种方式来取代它本身的帐号系统。若该用户脚本中的返回值非0就会导致HOD分配集群失败。并且在发生错误时HOD还会将脚本中产生的错误信息打印出来。通过这种方式任何描述性的错误信息都可以从用户脚本中返回给用户。
</p>
<p>在HOD中自带的默认脚本是不做任何的用户核实并返回0。</p>
<p>如果HOD没有找到上面提到的verify-account脚本HOD就会认为该用户核实的功能被关闭然后继续自己以后的分配工作。</p>
</section>
</section>
</section>
</body>
</document>

View File

@ -0,0 +1,158 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
Hadoop On Demand配置指南
</title>
</header>
<body>
<section>
<title>1. 简介</title>
<p>
这个文档讲述了一些最重要和常用的Hadoop On DemandHOD的配置项。
这些配置项可通过两种方式指定INI风格的配置文件通过--section.option[=value]格式指定的HOD shell的命令行选项。如果两个地方都指定了同一个选项命令行中的值覆盖配置文件中的值。
</p>
<p>
你可以通过以下命令获得所有配置项的简要描述:
</p>
<table><tr><td><code>$ hod --verbose-help</code></td></tr></table>
</section>
<section>
<title>2. 段</title>
<p>HOD配置文件分成以下几个配置段</p>
<ul>
<li> hod HOD客户端的配置项</li>
<li> resource_manager 指定要使用的资源管理器的配置项,以及使用该资源管理器时需要的一些其他参数。</li>
<li> ringmaster RingMaster进程的配置项</li>
<li> hodring HodRing进程的配置项</li>
<li> gridservice-mapred Map/Reduce守护进程的配置项</li>
<li> gridservice-hdfs HDFS守护进程的配置项</li>
</ul>
</section>
<section>
<title>3. HOD配置项</title>
<p>
接下来的一节会先描述大部分HOD配置段中通用的一些配置项再描述各配置段特有的配置项。
</p>
<section>
<title>3.1 一般的配置项</title>
<p>某些配置项会在HOD配置中的多个段定义。在一个段中定义的配置项会被该段所适用的所有进程使用。这些配置项意义相同但在不同的段中可以有不同的取值。</p>
<ul>
<li>temp-dir: HOD进程使用的临时目录。请确保运行hod的用户有权限在这个指定的目录下创建子目录。如果想在每次分配的时候都使用不同的临时目录可以使用环境变量资源管理器会让这些环境变量对HOD进程可用。例如在Torque设置的时候使--ringmaster.temp-dir=/tmp/hod- temp-dir.$PBS_JOBID会让ringmaster在每一次申请时使用不同的临时目录Troque会在ringmaster启动前展开这个环境变量。
</li>
<li>debug数值类型取值范围是1-4。4会产生最多的log信息。</li>
<li>log-dir日志文件的存放目录。缺省值是&lt;install-location&gt;/logs/。temp-dir变量的限制和注意事项在这里同样使用。
</li>
<li>xrs-port-range端口范围会在这之中挑选一个可用端口用于运行XML-RPC服务。</li>
<li>http-port-range端口范围会在这之中挑选一个可用端口用于运行HTTP服务。</li>
<li>java-home给Hadoop使用的Java的位置。</li>
<li>syslog-addresssyslog守护进程要绑定的地址。格式为host:port。如果配置了这个选项HOD日志信息会被记录到这个位置的syslog。</li>
</ul>
</section>
<section>
<title>3.2 hod的配置项</title>
<ul>
<li>cluster集群的描述性名称。对于Torque这个值被指定为集群中所有节点的'Node property'。HOD使用这个值计算可用节点的个数。</li>
<li>client-params逗号分割的hadoop配置参数列表其中的每一项都是一个key-value对。在提交节点上会据此产生一个hadoop-site.xml用于运行Map/Reduce作业。</li>
<li>job-feasibility-attr: 正则表达式,用于指定是否和如何检查作业的可行性 资源管理器限制或调度限制。目前是通过torque作业的'comment'属性实现的缺省情况下没有开启这个功能。设置了这个配置项后HOD会使用它来确定哪些种类的限制是启用的以及请求超出限制或者累积超出限制时是回收机群还是留在排队状态。torque comment属性可以被某个外部机制周期性地更新。比如comment属性被hod/support目录下的<a href="hod_admin_guide.html#checklimits.sh+-+资源限制监视器">checklimits.sh</a>更新这样设置job-feasibility-attr的值等于TORQUE_USER_LIMITS_COMMENT_FIELD, "User-limits exceeded. Requested:([0-9]*) Used:([0-9]*) MaxLimit:([0-9]*)"会使HOD产生相应的行为。</li>
</ul>
</section>
<section>
<title>3.3 resouce_manager的配置项</title>
<ul>
<li>queue资源管理器中配置的队列名作业会被提交到这里。</li>
<li>batch-home个安装目录其下的'bin'中有资源管理器的可执行文件。</li>
<li>env-vars逗号分隔的key-value对的列表形式是key=value它会被传递给运行在计算节点的作业。例如如果ptyhon没有安装在常规位置用户可以通过设置环境变量'HOD_PYTHON_HOME'指定python可执行文件的路径。之后在计算节点运行的HOD的进程就可以使用这个变量了。</li>
</ul>
</section>
<section>
<title>3.4 ringmaster的配置项</title>
<ul>
<li>work-dirs这是一个由逗号分隔的路径列表这些路径将作为HOD产生和传递给Hadoop用于存放DFS和Map/Reduce数据的目录的根目录。例如这是DFS数据块存放的路径。一般情况下有多少块磁盘就指定多少路径以确保所有的磁盘都被利用到。temp-dir变量的限制和注意事项在这儿同样适用。</li>
<li>max-master-failureshadoop主守护进启动前可以失败的次数超出这个次数后HOD会让这次集群分配失败。在HOD集群中有时候由于某些问题比如机器没安装java没有安装Hadoop或者Hadoop版本错误等会存在一个或几个“坏”节点。当这个配置项被设为正整数时只有当hadoop matserJobTracker或者NameNode在上述的坏节点上由于上面提到的种种原因启动失败的次数超过设定的值时RingMaster才会把错误返回给客户端。如果尝试启动的次数没有超过设定值当下一个HodRing请求运行一个命令时同一个hadoop master会指定给这个HodRing。这样即使集群中存在一些坏的节点HOD也会尽全力使这次分配成功。
</li>
</ul>
</section>
<section>
<title>3.5 gridservice-hdfs的配置项</title>
<ul>
<li>external如果被置为falseHOD必须在通过allocate命令分配的节点上自己创建HDFS集群。注意在这种情况下如果集群被回收HDFS集群会停止所有数据会丢失。如果被置为true它会尝试链接外部的已配置的HDFS系统。通常因为在作业运行之前作业的输入需要被放置在HDFS上并且作业的输出需要持久保留在生产环境中一个内部的HDFS集群意义不大。</li>
<li>host外部配置好的NameNode的主机名。</li>
<li>fs_portNameNode RPC服务绑定的端口。</li>
<li>info_portNameNode web UI服务绑定的端口。</li>
<li>pkgs安装目录其下有bin/hadoop可执行文件。可用来使用集群上预先安装的Hadoop版本。</li>
<li>server-params一个逗号分割的hadoop配置参数列表每一项为key-value对形式。这些将用于产生被NameNode和DataNode使用到的hadoop-site.xml文件。</li>
<li>final-server-params除会被标记为final外和上面相同。</li>
</ul>
</section>
<section>
<title>3.6 gridservice-mapred的配置项</title>
<ul>
<li>external如果被置为falseHOD必须在通过allocate命令分配的节点上自己创建Map/Reduce集群。如果被置为true它会尝试链接外部的已配置的Map/Reduce系统。</li>
<li>host外部配置好的JobTracker的主机名。</li>
<li>tracker_portJobTracker RPC服务绑定的端口。</li>
<li>info_portJobTracker web UI服务绑定的端口。</li>
<li>pkgs安装目录其下有bin/hadoop可执行文件。</li>
<li>server-params一个逗号分割的hadoop配置参数列表每一项为key-value对形式。这些将用于产生被JobTracker和TaskTracker使用到的hadoop-site.xml文件。</li>
<li>final-server-params除会被标记为final外和上面相同。</li>
</ul>
</section>
<section>
<title>3.7 hodring的配置项</title>
<ul>
<li>mapred-system-dir-rootDFS上的目录HOD会在这个目录下创建子目录并把全路径作为参数'mapred.system.dir'的值传递给Hadoop守护进程。全路径的格式为value-of-this-option/userid/mapredsystem/cluster-id。注意如果HDFS启用了权限这里指定的路径下应允许所有用户创建子目录。设置此配置项的值为/user会使HOD使用用户的home目录来产生mapred.system.dir的值。</li>
<li>log-destination-uri一个URL能反映一个外部的静态的DFS或者集群节点上的本地文件系统上的路径。当集群被回收时HOD会把Hadoop日志上传到这个路径。要指定DFS路径使用'hdfs://path'格式。要指定一个集群节点上的本地文件系统路径,使用'file://path'格式。当HOD回收集群时作为HOD的清除过程的一部分hadoop日志会被删除。要做到持久储这些日志你可以使用这个配置项。路径的格式会是values-of-this-option/userid/hod-logs/cluster-id。注意应该保证所有的用户能在这里指定的目录下创建子目录。把这个值设为hdfs://user会使这些日志被转移到用户在DFS上的home目录下。</li>
<li>pkgs安装目录其下有bin/hadoop可执行文件。如果给log-destination-uri指定了一个HDFS URLHOD上传日志时会用到这个配置项。注意当用户使用了和外部静态HDFS不同版本的tarball时这个配置项会派上用场。</li>
</ul>
</section>
</section>
</body>
</document>

View File

@ -0,0 +1,559 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>
Hadoop On Demand用户指南
</title>
</header>
<body>
<section>
<title>简介</title><anchor id="Introduction"></anchor>
<p>Hadoop On Demand (HOD)是一个能在大规模物理集群上供应虚拟Hadoop集群的系统。它使用Torque资源管理器进行节点分配。在所分配的节点上它能启动Hadoop Map/Reduce以及HDFS守护进程。它能自动为Hadoop守护进程及客户端生成合适的配置文件(Hadoop-site.xml)。HOD还能够将Hadoop分发到它分配的虚拟集群节点上。总之HOD方便管理者和用户快速安装与使用Hadoop。它也是需要在同一物理集群上测试各自版本的Hadoop开发者和测试者的实用工具。</p>
<p>HOD支持Hadoop 0.15及其后续版本。</p>
<p>后面的文档包括一个快速入门指南能让你快速上手HOD一个所有HOD特性的详细手册命令行选项一些已知问题和故障排除的信息。</p>
</section>
<section>
<title>HOD使用入门</title><anchor id="Getting_Started_Using_HOD_0_4"></anchor>
<p>在这部分我们将会逐步骤地介绍使用HOD涉及到的最基本的操作。在开始遵循这些步骤之前我们假定HOD及其依赖的软硬件均已被正确安装和配置。这步通常由集群的系统管理员负责。</p>
<p>HOD的用户界面是一个命令行工具叫做<code>hod</code>。它被一个通常由系统管理员为用户设置好的配置文件所驱动。用户在使用<code>hod</code>的时候可以覆盖这个配置,文档的后面会由介绍。使用<code>hod</code>时有如下两种方式可以指定配置文件:</p>
<ul>
<li>在命令行中指定,使用 -c 选项。例如<code>hod &lt;operation&gt; &lt;required-args&gt; -c path-to-the-configuration-file [ohter-options]</code></li>
<li>在运行<code>hod</code>的地方设置环境变量<em>HOD_CONF_DIR</em>。这个变量应指向指向一个本地目录,其中有名为<em>hodrc</em>的文件。这与Hadoop中的<em>HADOOP_CONF_DIR</em><em>hadoop-site.xml</em>文件是类似的。如果命令行中未指定配置文件,<code>hod</code>会查找<em>HOD_CONF_DIR</em>环境变量指定目录下的<em>hodrc</em>文件。</li>
</ul>
<p>下面的例子中,我们将不会明确指出这个配置选项,假定其已正确指定。</p>
<section><title>一个典型HOD会话</title><anchor id="HOD_Session"></anchor>
<p>一个典型HOD会话至少包括三个步骤分配执行Hadoop作业回收。为此执行如下步骤。</p>
<p><strong>创建一个集群目录</strong></p><anchor id="Create_a_Cluster_Directory"></anchor>
<p><em>集群目录</em>是本地文件系统上的一个目录,<code>hod</code>会为它分配的集群产生对应的Hadoop配置<em>hadoop-site.xml</em>,放在这个目录下。这个目录可以按下文所述方式传递给<code>hod</code>操作。如果这个目录不存在HOD会自动创建这个目录。一但分配好了集群用户可通过Hadoop --config选项指定集群目录在之上运行Hadoop作业。</p>
<p><strong><em>allocate</em>操作</strong></p><anchor id="Operation_allocate"></anchor>
<p><em>allocate</em>操作用来分配一组节点并在之上安装和提供Hadoop。它的语法如下。注意它要求指定参数集群目录-d, --hod.clusterdir和节点个数-n, --hod.nodecount</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes [OPTIONS]</code></td>
</tr>
</table>
<p>如果命令成功执行,<code>cluster_dir/hadoop-site.xml</code>会被生成文件中包含了分配出的集群的信息。它也会打印出关于Hadoop的web UI的信息。</p>
<p>试运行这个命令会产生如下输出。注意在这个例子中集群目录是<code>~/hod-clusters/test</code>我们要分配5个节点</p>
<table>
<tr>
<td><code>$ hod allocate -d ~/hod-clusters/test -n 5</code><br/>
<code>INFO - HDFS UI on http://foo1.bar.com:53422</code><br/>
<code>INFO - Mapred UI on http://foo2.bar.com:55380</code><br/></td>
</tr>
</table>
<p><strong>在分配的集群上执行Hadoop作业</strong></p><anchor id="Running_Hadoop_jobs_using_the_al"></anchor>
<p>现在可以用一般的方式在分配的集群上执行Hadoop作业了。这是假定像<em>JAVA_HOME</em>指向Hadoop安装的路径已被正确地设置了</p>
<table>
<tr>
<td><code>$ hadoop --config cluster_dir hadoop_command hadoop_command_args</code></td>
</tr>
</table>
<p>或者</p>
<table>
<tr>
<td><code>$ export HADOOP_CONF_DIR=cluster_dir</code> <br />
<code>$ hadoop hadoop_command hadoop_command_args</code></td>
</tr>
</table>
<p>继续我们的例子下面的命令会在分配的集群上运行wordcount的例子</p>
<table><tr><td><code>$ hadoop --config ~/hod-clusters/test jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</code></td></tr></table>
<p>或者</p>
<table><tr>
<td><code>$ export HADOOP_CONF_DIR=~/hod-clusters/test</code><br />
<code>$ hadoop jar /path/to/hadoop/hadoop-examples.jar wordcount /path/to/input /path/to/output</code></td>
</tr>
</table>
<p><strong> <em>deallocate</em>操作</strong></p><anchor id="Operation_deallocate"></anchor>
<p><em>deallocate</em>操作用来回收分配到的集群。当完成集群使用之后,必须执行回收操作使这些节点可以为其他用户所用。<em>deallocate</em>操作的语法如下。注意它需要集群目录(-d, --hod.clusterdir作为参数</p>
<table>
<tr>
<td><code>$ hod deallocate -d cluster_dir</code></td>
</tr>
</table>
<p>继续我们的例子,如下命令会回收集群:</p>
<table><tr><td><code>$ hod deallocate -d ~/hod-clusters/test</code></td></tr></table>
<p>如你所见HOD允许用户分配一个集群随意的使用它来运行Hadoop作业。例如通过从多个shell中启动使用同一个配置的hadoop用户可以做到在同一个集群上并发运行多个作业。</p>
</section>
<section><title>使用HOD运行Hadoop脚本</title><anchor id="HOD_Script_Mode"></anchor>
<p>HOD的<em>script操作</em>能将集群的分配使用和回收组织在一起。这对那些想运行Hadoop作业脚本期望HOD能在脚本结束后自动完成清理操作的用户特别管用。用<code>hod</code>执行Hadoop脚本需要这么做</p>
<p><strong>创建脚本文件</strong></p><anchor id="Create_a_script_file"></anchor>
<p>这是一个普通的shell脚本通常里面会包含hadoop命令</p>
<table><tr><td><code>$ hadoop jar jar_file options</code></td>
</tr></table>
<p>当然用户可以向脚本中添加任何有效的命令。HOD会在执行这个脚本时自动地设置<em>HADOOP_CONF_DIR</em>指向分配的集群。用户不必对此担心。不过,像分配操作时一样,用户需要指定一个集群目录。</p>
<p><strong>运行脚本</strong></p><anchor id="Running_the_script"></anchor>
<p><em>脚本操作</em>的语法如下。注意它需要集群目录(-d, --hod.clusterdir节点个数-n, --hod.nodecount以及脚本文件-s, --hod.script作为参数</p>
<table>
<tr>
<td><code>$ hod script -d cluster_directory -n number_of_nodes -s script_file</code></td>
</tr>
</table>
<p>注意一但脚本执行完毕HOD就会回收集群这意味着脚本必须要做到等hadoop作业完成后脚本才结束。用户写脚本时必须注意这点。</p>
</section>
</section>
<section>
<title>HOD的功能</title><anchor id="HOD_0_4_Features"></anchor>
<section><title>供应与管理Hadoop集群</title><anchor id="Provisioning_and_Managing_Hadoop"></anchor>
<p>HOD主要功能是供应Hadoop的Map/Reduce和HDFS集群。这些在见入门一节已经做过描述。 此外要是还有节点可用并且组织上也批准一个用户可以在同一时间内使用HOD分配多个Map/Reduce集群。对于分配到的不同集群用户需要为上面提到的<code>cluster_dir</code>参数指定不同的路径。HOD提供<em>list</em><em>info</em>操作可以管理多个集群。</p>
<p><strong><em>list</em>操作</strong></p><anchor id="Operation_list"></anchor>
<p>list操作能列举到目前为止用户所创建的所有集群。存放hadoop-site.xml的集群目录与JobTracker和或HDFS的连接及状态也会被显示出来。list操作的使用语法如下</p>
<table>
<tr>
<td><code>$ hod list</code></td>
</tr>
</table>
<p><strong><em>info</em>操作</strong></p><anchor id="Operation_info"></anchor>
<p>info操作会显示指定集群相关的信息。这些信息包括Torque作业idHOD Ringmaster进程Hadoop的JobTracker和NameNode守护进程等重要守护进程的位置。info操作的语法如下。注意它需要集群目录-d, --hod.clusterdir作为参数</p>
<table>
<tr>
<td><code>$ hod info -d cluster_dir</code></td>
</tr>
</table>
<p><code>cluster_dir</code>应为前面<em>allocate</em>操作中指定的有效集群目录。</p>
</section>
<section><title>使用tarball分发Hadoop</title><anchor id="Using_a_tarball_to_distribute_Ha"></anchor>
<p>供应Hadoop时HOD可以使用集群节点上已经安装好的Hadoop也可以将hadoop的tarball作为供应操作的一部分在节点上进行分发和安装。如果使用tarball选项就不必非得使用预装的Hadoop了也不要求集群节点上必须有一个预装的版本。这对开发QE环境下在一个共享集群上测试不同版本hadoop的开发者尤其有用。</p>
<p>要使用预装的Hadoop你必须在hodrc中的<code>gridservice-hdfs</code>部分和<code>gridservice-mapred</code>部分指定<code>pkgs</code>选项。它必须指向集群中所有节点上Hadoop的安装路径。</p>
<p>指定Tarball的语法如下</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes -t hadoop_tarball_location</code></td>
</tr>
</table>
<p>例如下面的命令根据tarball<code>~/share/hadoop.tar.gz</code>分配Hadoop</p>
<table><tr><td><code>$ hod allocate -d ~/hadoop-cluster -n 10 -t ~/share/hadoop.tar.gz</code></td></tr></table>
<p>类似地使用hod脚本的语法如下</p>
<table>
<tr>
<td><code>$ hod script -d cluster_directory -s scritp_file -n number_of_nodes -t hadoop_tarball_location</code></td>
</tr>
</table>
<p>上面语法中指定的hadoop_tarball_location应指向从所有计算节点都可以访问的共享文件系统的路径。当前HOD只支持挂载的NFS。</p>
<p><em>注意:</em></p>
<ul>
<li>为了获得更好分发性能建议Hadoop tarball只包含库与二进制文件不包含源代码或文档。</li>
<li>当你希望在用tarball方式分配的集群上执行作业你必须使用兼容的Hadoop版本提交你的作业。最好的方式是解压使用Tarball中的版本。</li>
<li>你需要确保在tar分发包的conf目录下没有Hadoop配置文件hadoop-env.sh和hadoop-site.xml。如果这些文件存在并包含错误的值集群分配可能会失败。
</li>
</ul>
</section>
<section><title>使用外部HDFS</title><anchor id="Using_an_external_HDFS"></anchor>
<p>在典型的由HOD提供的Hadoop集群中HDFS已经被静态地未使用HOD设置好。这能使数据在HOD提供的集群被回收后还可以持久保存在HDFS中。为使用静态配置的HDFS你的hodrc必须指向一个外部HDFS。具体就是在hodrc的<code>gridservice-hdfs</code>部分将下面选项设置为正确的值:</p>
<table><tr><td>external = true</td></tr><tr><td>host = HDFS NameNode主机名</td></tr><tr><td>fs_port = HDFS NameNode端口</td></tr><tr><td>info_port = HDFS NameNode web UI的端口</td></tr></table>
<p><em>注意:</em>你也可以从命令行开启这个选项。即你这样去使用一个静态HDFS<br />
</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes --gridservice-hdfs.external</code></td>
</tr>
</table>
<p>如果需要HOD即可以供应HDFS集群也可以供应Map/Reduce的集群HOD。这需要设置hodrc中的<code>gridservice-hdfs</code>部分的下列选项:</p>
<table><tr><td>external = false</td></tr></table>
</section>
<section><title>配置Hadoop的选项</title><anchor id="Options_for_Configuring_Hadoop"></anchor>
<p>HOD提供一个非常方便的机制能配置它提供的Hadoop守护进程和它在客户端生成的hadoop-site.xml。通过在HOD配置文件中指定配置参数或在分配集群时在命令行指定都可做到这点。</p>
<p><strong>配置Hadoop守护进程</strong></p><anchor id="Configuring_Hadoop_Daemons"></anchor>
<p>要配置Hadoop守护进程你可以这么做</p>
<p>对于Map/Reduce指定<code>gridservice-mapred</code>部分的<code>server-params</code>项的指为一个以逗号分割的key-value对列表。同配置动态HDFS集群一样设置<code>gridservice-hdfs</code>部分的<code>server-params</code>项。如果这些参数应被标记成<em>final</em>,将这些参数包含到相应部分的<code>final-server-params</code>项中。</p>
<p>例如:</p>
<table><tr><td><code>server-params = mapred.reduce.parallel.copies=20,io.sort.factor=100,io.sort.mb=128,io.file.buffer.size=131072</code></td></tr><tr><td><code>final-server-params = mapred.child.java.opts=-Xmx512m,dfs.block.size=134217728,fs.inmemory.size.mb=128</code></td>
</tr></table>
<p>要从命令行指定选项,你可以用如下语法:</p>
<p>配置Map/Reduce守护进程</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes -Mmapred.reduce.parallel.copies=20 -Mio.sort.factor=100</code></td>
</tr>
</table>
<p>在上述例子中,<em>mapred.reduce.parallel.copies</em>参数和<em>io.sort.factor</em>参数将会被添加到<code>server-params</code>中,如果已经在<code>server-params</code>中存在,则它们会被覆盖。要将这些参数指定成<em>final</em>类型,你可以:</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes -Fmapred.reduce.parallel.copies=20 -Fio.sort.factor=100</code></td>
</tr>
</table>
<p>不过应注意final参数无法被命令行改写的只有在未指定的情形才能追加。</p>
<p>配置动态供应的HDFS守护进程的选项与此相似。用-H替换-M以用-S替换-F即可。</p>
<p><strong>配置Hadoop的作业提交客户端程序</strong></p><anchor id="Configuring_Hadoop_Job_Submissio"></anchor>
<p>如上所述当allocate操作成功后<code>cluster_dir/hadoop-site.xml</code>将会生成其中会包含分配的集群的JobTracker和NameNode的信息。这个配置用于向集群提交作业。HOD提供选项可将其它的hadoop配置参数添加到该文件其语法如下</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes -Cmapred.userlog.limit.kb=200 -Cmapred.child.java.opts=-Xmx512m</code></td>
</tr>
</table>
<p>上例中,<em>mapred.userlog.limit.kb</em><em>mapred.child.java.opts</em>会被添加到hod产生的hadoop-site.xml中。</p>
</section>
<section><title>查看Hadoop的Web-UI</title><anchor id="Viewing_Hadoop_Web_UIs"></anchor>
<p>HOD的allocate操作会打印出JobTracker和NameNode的Web UI的URL。例如</p>
<table><tr><td><code>$ hod allocate -d ~/hadoop-cluster -n 10 -c ~/hod-conf-dir/hodrc</code><br/>
<code>INFO - HDFS UI on http://host242.foo.com:55391</code><br/>
<code>INFO - Mapred UI on http://host521.foo.com:54874</code>
</td></tr></table>
<p>上面提到的<em>info</em>操作可以给你同样的信息。</p>
</section>
<section><title>收集和查看Hadoop日志</title><anchor id="Collecting_and_Viewing_Hadoop_Lo"></anchor>
<p>要获取在某些分配节点上运行的守护进程的Hadoop日志</p>
<ul>
<li>登录感兴趣的节点。如果你想查看JobTracker或者NameNode的日志<em>list</em><em>info</em>操作能告诉你这些进程在那些节点上运行。</li>
<li>获取感兴趣的守护进程的进程信息(例如,<code>ps ux | grep TaskTracker</code></li>
<li>在这些进程信息中,查找变量<code>-Dhadoop.log.dir</code>的值。通常是hod配置文件里<code>hodring.temp-dir</code>目录的一个子目录 。</li>
<li>切换到<code>hadoop.log.dir</code>目录以查看守护进程日志和用户日志。</li>
</ul>
<p>HOD也提供了一个机制能让你在集群回收后将日志收集存放到文件系统或者一个在外部配置的HDFS中。这样的话在作业完成节点回收后你还可以看这些日志。要做到这点像下面一样为log-destination-uri指定一个URI</p>
<table><tr><td><code>log-destination-uri= hdfs://host123:45678/user/hod/logs</code>或者</td></tr>
<tr><td><code>log-destination-uri= file://path/to/store/log/files</code></td></tr>
</table>
<p>在上面指定的的根目录中HOD会创建路径user_name/torque_jobid把作业涉及到的每个节点上的日志文件gzip压缩存放在里面。</p>
<p>注意要在HDFS上存储这些文件你得将<code>hodring.pkgs</code>项配置为和刚才提到的HDFS兼容的版本。否则HOD会尝试使用它供应Hadoop集群时用到的Hadoop版本。</p>
</section>
<section><title>闲置集群的自动回收</title><anchor id="Auto_deallocation_of_Idle_Cluste"></anchor>
<p>HOD会自动回收在一段时间内没有运行Hadoop作业的集群。每次的HOD分配会带有一个监控设施不停地检查Hadoop作业的执行。如果侦测到在一定时间内没Hadoop作业在执行它就回收这个集群释放那些未被有效利用的节点。</p>
<p><em>注意:</em>当集群被回收时,<em>集群目录</em>没有被自动清空。用户须通过一个正式的<em>deallcocate</em>操作清理它。</p>
</section>
<section><title>指定额外的作业属性</title><anchor id="Specifying_Additional_Job_Attrib"></anchor>
<p>HOD允许用户为一个Torque作业指定一个时钟时间和一个名称或者标题</p>
<p>时钟时间是对Torque作业有效时间的一个估计。这个时间过期后Torque将自动删除这个作业释放其节点。指定这个时钟时间还能帮助作业调度程序更好的安排作业提高对集群资源的使用率。</p>
<p>指定时钟时间的语法如下:</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes -l time_in_seconds</code></td>
</tr>
</table>
<p>Torque作业的名称或标题能给用户以友好的作业标识。每次展示Torque作业的属性的时候这个字符串就会出现包括<code>qstat</code>命令。</p>
<p>指定名称或标题的语法如下:</p>
<table>
<tr>
<td><code>$ hod allocate -d cluster_dir -n number_of_nodes -N name_of_job</code></td>
</tr>
</table>
<p><em>注意:</em>由于底层Torque资源管理器的限制不以字母开头或者包含空格的名字将导致作业失败。失败信息会表明问题存在于指定的作业名称中。</p>
</section>
<section><title>捕获HOD在Torque中的退出码</title><anchor id="Capturing_HOD_exit_codes_in_Torq"></anchor>
<p>HOD退出码出现在Torque的exit_status字段中。这有助于使用者和系统管理员区分成功的HOD执行和失败的HOD执行。如果分配成功且所有Hadoop作业在所分配的集群上正确的执行退出码为0。如果分配失败或者部分hadoop作业在分配集群上运行失败退出码非0。下表列出了可能出现的退出码。<em>注意只有所使用的Hadoop版本是0.16或以上时Hadoop作业状态才可以被捕获。</em></p>
<table>
<tr>
<td>退出码</td>
<td>含义</td>
</tr>
<tr>
<td> 6 </td>
<td>Ringmaster故障</td>
</tr>
<tr>
<td> 7 </td>
<td> DFS故障</td>
</tr>
<tr>
<td> 8 </td>
<td> Job tracker故障</td>
</tr>
<tr>
<td> 10 </td>
<td> 集群死亡</td>
</tr>
<tr>
<td> 12 </td>
<td> 集群已分配 </td>
</tr>
<tr>
<td> 13 </td>
<td> HDFS死亡</td>
</tr>
<tr>
<td> 14 </td>
<td> Mapred死亡</td>
</tr>
<tr>
<td> 16 </td>
<td>集群中所有的Map/Reduce作业失败。查看hadoop日志了解更多细节。</td>
</tr>
<tr>
<td> 17 </td>
<td>集群中部分的Map/Reduce作业失败。查看hadoop日志了解更多细节。</td>
</tr>
</table>
</section>
<section>
<title>命令行</title><anchor id="Command_Line"></anchor>
<p>HOD命令行的通用的语法如下<br/>
<em>hod &lt;operation&gt; [ARGS] [OPTIONS]<br/></em>
允许的操作有allocatedeallocateinfolistscript以及help。要获取某特定操作的帮助你可以执行<code>hod help &lt;operation&gt;</code>。要查看可能的操作你可以执行<code>hod help options</code></p>
<p><em>allocate</em><br />
<em>用法hod allocate -d cluster_dir -n number_of_nodes [OPTIONS]</em><br />
分配一个指定节点数目的集群把分配信息存放在cluster_dir方便后续<code>hadoop</code>命令使用。注意<code>cluster_dir</code>必须在运行该命令前已经存在。</p>
<p><em>list</em><br/>
<em>用法hod list [OPTIONS]</em><br />
列举出用户分配的所有集群。提供的信息包括集群对应的的Torque作业标识存储分配信息的集群目录Map/Reduce守护进程是否存活。</p>
<p><em>info</em><br/>
<em>用法hod info -d cluster_dir [OPTIONS]</em><br />
列举集群分配信息存放于某指定集群目录的集群信息。</p>
<p><em>deallocate</em><br/>
<em>用法hod deallocate -d cluster_dir [OPTIONS]</em><br />
回收集群分配信息存放于某指定集群目录的集群。</p>
<p><em>script</em><br/>
<em>用法hod script -s script_file -d cluster_directory -n number_of_node [OPTIONS]</em><br />
用HOD<em>script</em>操作执行一个hadoop脚本。在给定数目的节点上提供Hadoop在提交的节点执行这个脚本并在脚本执行结束后回收集群。</p>
<p><em>help</em><br/>
<em>用法hod help [operation | 'options']</em><br/>
未指定参数时,<code>hod help</code>给出用法以及基本选项,等同于<code>hod --help</code> 见下文。当指定参数options显示hod的基本选项。当指定operation时它会显示出该特定operation的用法和相应的描述。例如希望了解allocate操作你可以执行<code>hod help allocate</code></p>
<p>除上面的操作外HOD还能接受下列命令行选项。</p>
<p><em>--help</em><br />
打印出用法和基本选项的帮助信息。</p>
<p><em>--verbose-help</em><br />
hodrc文件中所有的配置项均可通过命令行传递使用语法<code>--section_name.option_name[=vlaue]</code>。这种方式下命令行传递的参数会覆盖hodrc中的配置项。verbose-help命令会列出hodrc文件中全部可用项。这也是一个了解配置选项含义的好方法。</p>
<p><a href="#Options_Configuring_HOD">下一部分</a>有多数重要的hod配置项的描述。对于基本选项你可以通过<code>hod help options</code>了解对于所有的hod配置中的可能选项你可以参看<code>hod --verbose-help</code>的输出。了解所有选项的描述,请参看<a href="hod_config_guide.html">配置指南</a></p>
</section>
<section><title> HOD配置选项</title><anchor id="Options_Configuring_HOD"></anchor>
<p> 如上所述HOD的配置是通过系统管理员设置配置文件完成。这是一个INI风格的配置文件文件分成多个段每个段包含一些配置项。这些段分别和HOD的进程clientringmasterhodringmapreduce或hdfs相关。每一个配置项有选项名和值构成。</p>
<p>有两种方式可让用户覆盖默认配置文件里的设定:</p>
<ul>
<li>在每条命令前用户可以向HOD提供自己的配置文件使用<code>-c</code>选项。</li>
<li>用户可以在命令行指定HOD的配置选项覆盖正使用的配置文件中提供的值。</li>
</ul>
<p>这一节介绍一些最常用的配置项。为了指定方便,这些常用选项通常会有一个<em></em>选项名。所有其它选项可能用随后介绍的<em></em>选项指定。</p>
<p><em>-c config_file</em><br />
提供要使用的配置文件。可与其他任何的HOD选项一起使用。此外可定义<code>HOD_CONF_DIR</code>环境变量为一个包含<code>hodrc</code>文件的目录避免每条HOD命令都要指定配置文件。</p>
<p><em>-d cluster_dir</em><br />
大多数hod操作都要求这个选项。如<a href="#Create_a_Cluster_Directory">此处</a>描述的,<em>集群目录</em>是在本地文件系统上的一个目录,<code>hod</code>将它分配集群的相应Hadoop配置产生在这个目录里<em>hadoop-site.xml</em>。使用-d或者--hod.clusterdir将这个参数传递给<code>hod</code>操作如果目录不存在HOD会自动创建该目录。集群分配好后用户可在这个集群上通过指定hadoop--config为集群目录来执行Hadoop作业。</p>
<p><em>-n number_of_nodes</em><br />
hod allocation操作和script操作要求这个选项。表示要分配的节点数。</p>
<p><em>-s script-file</em><br/>
脚本操作时需要,用于指定要执行的脚本文件。</p>
<p><em>-b 1|2|3|4</em><br />
启用给定的调试级别。能与其他HOD选项一起使用。级别4最为详尽。</p>
<p><em>-t hadoop_tarball</em><br />
从指定tar.gz文件提供Hadoop分发。此选项值只适用于<em>allocate</em>操作。为获得更好的分发性能强烈推荐创建Hadoop tarball<em></em>删除其中的源代码或文档。</p>
<p><em>-N job-name</em><br />
内部使用的资源管理作业名。比如对于Torque作为资源管理器的情况会被解释成<code>qsub -N</code>选项,使用<code>qstat</code>命令时可以看到这个作业名。</p>
<p><em>-l wall-clock-time</em><br />
用户希望在分配的集群作业的时间总量。它被传递给HOD底层的资源管理器用于更有效地调度和利用集群。注意对于Torque的情形这个时间到期后集群会在被自动回收。</p>
<p><em>-j java-home</em><br />
JAVA_HOME环境变量里指定的路径。在<em>script</em>操作中使用。HOD将JAVA_HOME环境变量设置为这个值并在此环境下启动用户脚本。</p>
<p><em>-A account-string</em><br />
传递给后台资源管理器的核计信息。</p>
<p><em>-Q queue-name</em><br />
接受作业提交的后台资源管理器中队列的名称。</p>
<p><em>-Mkey1=value1 -Mkey2=value2</em><br/>
为供应的Map/Reduce守护进程JobTracker以及TaskTracker提供配置参数。在集群节点上会根据这些值产生一个hadoop-site.xml。 <br />
<em>注意:</em>值中的下列字符:空格,逗号,等号,分号需要使用‘\’转义, 且放置在引号中。你也可以使用‘\’来转义‘\’。</p>
<p><em>-Hkey1=value1 -Hkey2=value2</em><br />
为供应的HDFS守护进程NameNode以及DataNode提供配置参数。在集群节点上会根据这些值产生一个hadoop-site.xml。 <br />
<em>注意:</em>值中的下列字符:空格,逗号,等号,分号需要使用‘\’转义, 且放置在引号中。你也可以使用‘\’来转义‘\’。</p>
<p><em>-Ckey1=value1 -Ckey2=value2</em><br />
为提交作业的客户端提供配置参数。在提交节点上会根据这些值产生一个hadoop-site.xml。<br />
<em>注意:</em>参数值可以使用以下符号:空格,逗号,等号,需要‘\’做转义符的分号,上述符号要用引号进行分割。你也可以使用‘\’转义‘\’。 </p>
<p><em>--section-name.option-name=value</em><br />
这是用<em></em>格式提供配置选项的方法。比如,你可以<em>--hod.script-wait-time=20</em></p>
</section>
</section>
<section>
<title>故障排除</title><anchor id="Troubleshooting"></anchor>
<p>下节列出了一些用户使用HOD时可能碰到的多发错误的条件以及解决问题的方法</p>
<section><title>分配操作时<code>hod</code>挂起</title><anchor id="_hod_Hangs_During_Allocation"></anchor><anchor id="hod_Hangs_During_Allocation"></anchor>
<p><em>可能原因:</em>HOD或Hadoop的一个组件启动失败。这种情况下<code>hod</code>命令会在一段时间通常是2-3分钟后返回退出码是错误代码部分定义的错误码7或8。参考该部分以获得更多细节。 </p>
<p><em>可能原因:</em>使用tarball模式申请了大规模的集群。有时由于网络负载或者是分配节点上的负载tarball分发过程可能会慢的比较明显需要几分钟才能响应。等待命令完成。还可以检查一下tarball看是否不含Hadoop源码或文档。</p>
<p><em>可能原因:</em>Torque相关的问题。如果原因与Torque相关<code>hod</code>命令5分钟内是不会返回的。在调试模式下运行<code>hod</code>你会发现<code>qstat</code>命令被重复执行。在另一个shell中执行<code>qstat</code>命令你会发现作业处于<code>Q</code>排队状态。这通常说明Torque出现了问题。可能原因有个别节点宕机或者增加了新节点但Torque不知。通常需要系统管理员帮助解决此问题。</p>
</section>
<section><title>回收操作时<code>hod</code>挂起</title><anchor id="_hod_Hangs_During_Deallocation"></anchor><anchor id="hod_Hangs_During_Deallocation"></anchor>
<p><em>可能原因:</em>Torque相关的问题通常是Torque server上的负载较大或者是分配的集群非常大。一般来说你唯一能做的是等待命令执行完成。</p>
</section>
<section><title><code>hod</code>失败时的错误代码和错误信息</title><anchor id="hod_Fails_With_an_error_code_and"></anchor><anchor id="_hod_Fails_With_an_error_code_an"></anchor>
<p>如果<code>hod</code>命令的退出码不是<code>0</code>,参考下面的退出代码表确定此情况发生的原因和相应的调试方法。</p>
<p><strong>错误代码</strong></p><anchor id="Error_Codes"></anchor>
<table>
<tr>
<th>错误代码</th>
<th>含义</th>
<th>可能原因及补救方法</th>
</tr>
<tr>
<td> 1 </td>
<td>配置错误 </td>
<td>hodrc中的参数错误或者其他与HOD配置相关的错误。此类情况下错误信息已经足够帮你发现和解决问题。</td>
</tr>
<tr>
<td> 2 </td>
<td>无效操作</td>
<td>执行<code>hod help</code>查看有效的操作列表。</td>
</tr>
<tr>
<td> 3 </td>
<td>无效操作参数</td>
<td>执行<code>hod help operation</code>查看特定操作的用法。</td>
</tr>
<tr>
<td> 4 </td>
<td>调度失败</td>
<td> 1. 请求分配了过多的资源。执行<code>checknodes cluster_name</code>查看是否有足够多的可用节点。<br />
2. 请求的资源超出了资源管理器的限制。<br />
3. Torque配置错误Torque可执行文件路径配置错误或者其它Torque相关问题。联系系统管理员。</td>
</tr>
<tr>
<td> 5 </td>
<td>执行作业失败</td>
<td> 1. Torque作业被外部删除。执行Torque <code>qstat</code>命令查看是否有作业处于<code>R</code>运行状态。如果没有尝试重新运行HOD。<br/>
2. Torque的问题诸如服务器暂时性宕机或者无响应。联系系统管理员。 <br/>
3. 系统管理员可能配置了帐号核实,并且一个非法的帐号被指定。请联系系统管理员。 </td>
</tr>
<tr>
<td> 6 </td>
<td>Ringmaster故障</td>
<td> HOD会打印信息"Cluster could not be allocated because of the following errors on the ringmaster host &lt;hostname&gt;"。实际的错误信息可能指示下列情形中的一种:<br/>
1. 运行ringmaster的节点配置不合法错误信息中的hostname会指明具体的机器。<br/>
2. <code>ringmaster</code>段的配置无效,<br />
3. <code>gridservice-mapred或者gridservice-hdfs</code>段中<code>pkgs</code>项的配置无效,<br />
4. 无效的hadoop tarball或者tarball中conf目录下存在无效的配置文件<br />
5. Hadoop中的MapReduce与外部HDFS版本不匹配。<br />
Torque <code>qstat</code>命令很可能会显示一个出于<code>C</code>(Completed已完成)状态的作业。<br/>
你可以登录到HOD失败信息中给出的ringmaster主机根据错误信息的提示解决问题。如果错误信息没有给出完整的信息ringmaster日志也可能帮助找到问题的根源。参考下面<em>定位Ringmaster日志</em>一节了解更多信息。</td>
</tr>
<tr>
<td> 7 </td>
<td> DFS故障</td>
<td> 当HOD由于DFS故障或者Job tracker失败错误码8下文有介绍分配失败时它会打印错误信息 "Hodring at &lt;hostname&gt; failed with following errors:",并给出真正的错误信息,这个信息可能表明下列情形中的一种:<br/>
1. 启动Hadoop集群时出现问题。通常错误信息会表明之前提到的主机出现错误的真正原因。你也要检查HOD配置中文件中Hadoop相关的配置。按上面<em>收集和查看Hadoop日志</em>一节中介绍的方法查看Hadoop的日志。<br />
2. 运行hodring的节点上的配置无效错误信息中的hostname会指明机器<br/>
3. hodrc中<code>hodring</code>段的配置无效。<code>ssh</code>到错误信息中提到的节点在hdring日志中grep<code>ERROR</code><code>CRITICAL</code>。参考下面<em>定位Hodring日志</em>部分获取更多信息。<br />
4. 指定了无效的tarball可能未正确打包。<br />
5. 无法与外部配置的HDFS通信。<br />
当DFS或Job tracker出现故障时你可以登录到HOD失败信息中提到的主机上进行debug。解决问题的时候你也应通过查看ringmaster日志中的其它日志信息来检查其他机器是否在启动jobtracker/namenode时也出现了问题而不只是检查错误信息中提到的主机。其他机器也可能发生问题是因为HOD会按照配置项<a href="hod_config_guide.html#3.4+ringmaster的配置项">ringmaster.max-master-failures</a>的设置在多个机器上连续尝试和启动hadoop守护进程。更多关于ringmaster日志的信息请参考下文<em>定位Ringmaster日志</em>
</td>
</tr>
<tr>
<td> 8 </td>
<td>Job tracker故障</td>
<td><em>DFS故障</em>情形中的原因类似。</td>
</tr>
<tr>
<td> 10 </td>
<td>集群死亡</td>
<td>1. 集群因为较长时间空闲被自动回收。<br />
2. 集群因系统管理员或者用户指定的时钟时间到期被自动回收。<br />
3. 无法与成功分配的JobTracker以及HDFS的NameNode通信。回收集群重新分配。</td>
</tr>
<tr>
<td> 12 </td>
<td>集群已分配</td>
<td>指定的集群目录是已被用于先前的分配操作,且尚未回收。指定另外一个目录,或者先回收先前分配的。</td>
</tr>
<tr>
<td> 13 </td>
<td>HDFS死亡</td>
<td>无法与HDFS的NameNode通信。HDFS的NameNode停掉了。</td>
</tr>
<tr>
<td> 14 </td>
<td>Mapred死亡</td>
<td> 1. 集群因为长时间闲置被自动回收。 <br />
2. 集群因系统管理员或用户指定的时钟时间到期被自动回收。<br />
3. 无法与Map/Reduce的JobTracker通信。JobTracker节点宕机。 <br />
</td>
</tr>
<tr>
<td> 15 </td>
<td>集群未分配</td>
<td>一个需要已分配集群的操作被指以一个没有状态信息的集群目录。</td>
</tr>
<tr>
<td>任意非0退出代码</td>
<td>HOD脚本错误</td>
<td>如果使用了hod的脚本选项很可能这个退出代码是脚本的退出吗。不幸的是这可能会与hod自己的退出码冲突。为帮助用户区分两者如果脚本返回了一个退出码hod将此退出码写到了集群目录下的script.exitcode文件。你可以cat这个文件以确定脚本的退出码。如果文件不存在则退出代码是hod命令的退出码。</td>
</tr>
</table>
</section>
<section><title>Hadoop DFSClient警告NotReplicatedYetException信息</title>
<p>有时当你申请到一个HOD集群后马上尝试上传文件到HDFS时DFSClient会警告NotReplicatedYetException。通常会有一个这样的信息 - </p><table><tr><td><code>WARN
hdfs.DFSClient: NotReplicatedYetException sleeping &lt;filename&gt; retries
left 3</code></td></tr><tr><td><code>08/01/25 16:31:40 INFO hdfs.DFSClient:
org.apache.hadoop.ipc.RemoteException: java.io.IOException: File
&lt;filename&gt; could only be replicated to 0 nodes, instead of
1</code></td></tr></table><p> 当你向一个DataNodes正在和NameNode联络的集群上传文件的时候这种现象就会发生。在上传新文件到HDFS之前多等待一段时间就可以解决这个问题因为这使得足够多的DataNode启动并且联络上了NameNode。</p>
</section>
<section><title>成功分配的集群上无法运行Hadoop作业</title><anchor id="Hadoop_Jobs_Not_Running_on_a_Suc"></anchor>
<p>这一情景通常发生在这种情形一个集群已经分配并且一段时间内处于不活跃状态之后hadoop作业试图在这个集群上运行。Hadoop作业会失败产生如下异常信息</p>
<table><tr><td><code>08/01/25 16:31:40 INFO ipc.Client: Retrying connect to server: foo.bar.com/1.1.1.1:53567. Already tried 1 time(s).</code></td></tr></table>
<p><em>可能原因:</em>相当长的时间内无hadoop作业运行集群会如<em>闲置集群的自动回收</em>一节介绍的那样被自动回收。回收该集群,然后重新分配。</p>
<p><em>可能原因:</em>从分配开始算起Torque管理员指定的或<em>指定额外的作业属性</em>一节中定义的<code>-l</code>选项指定的时间上限过期。这种情况下集群可能已被释放。回收集群,然后重新分配。</p>
<p><em>可能原因:</em>提交作业使用的hadoop版本和供应集群的Hadoop版本通常通过tarball选项不匹配。确保使用的兼容的版本。</p>
<p><em>可能原因:</em> 提交job的hadoop客户端与提供的hadoop(通常通过tarball选项)版本不兼容。 确保所使用hadoop软件版本兼容。</p>
<p><em>可能原因:</em> 你使用了<code>-M or -H</code>中的一个指定Hadoop配置其中有未正确转义的字符比如空格或逗号。参考<em>HOD配置选项</em>一节以了解如何正确指定这些选项。</p>
</section>
<section><title>我的Hadoop作业被中止了</title><anchor id="My_Hadoop_Job_Got_Killed"></anchor>
<p><em>可能原因:</em>从分配开始算起Torque管理员指定的或<em>指定额外的作业属性</em>一节中定义的<code>-l</code>选项指定的时间上限过期。这种情况下集群可能已被释放。回收集群,然后重新分配,这次要制定一个大点儿的时钟时间。</p>
<p><em>可能原因:</em> JobTracker节点出现问题。参考<em>收集和查看Hadoop日志</em>一节以获取更多信息。</p>
</section>
<section><title>Hadoop作业失败并返回消息Job tracker still initializing</title><anchor id="Hadoop_Job_Fails_with_Message_Jo"></anchor>
<p><em>可能原因:</em>hadoop作业是作为HOD脚本的一部分运行的它在JobTracker完全就绪前开始了执行。分配集群时为配置选<code>--hod.script-wait-time</code>设定一个大点儿的值。通常取120是可以工作的尽管通常没必要这么大。</p>
</section>
<section><title>Torque的退出代码没有包含HOD的</title><anchor id="The_Exit_Codes_For_HOD_Are_Not_G"></anchor>
<p><em>可能原因:</em>此功能需要Hadoop 0.16。所用的Hadoop版本不满足这个条件。请使用合适的Hadoop版本。</p>
<p><em>可能原因:</em>没有使用<code>hod</code>命令回收集群;例如直接使用<code>qdel</code>。当使用这种方式回收集群时HOD进程被信号中止。这会导致退出码是基于signal number的而不是程序的退出码。</p>
</section>
<section><title>Hadoop日志未被上传到DFS</title><anchor id="The_Hadoop_Logs_are_Not_Uploaded"></anchor>
<p><em>可能原因:</em>上传日志的使用的hadoop与外部的HDFS版本不兼容。确保<code>hodring.pkgs</code>选项指定了正确的版本。</p>
</section>
<section><title>定位Ringmaster日志</title><anchor id="Locating_Ringmaster_Logs"></anchor>
<p>遵循以下步骤定位ringmaster日志</p>
<ul>
<li>用-b选项在调试模式执行hod。这会打印出当前运行的Torque作业的标识。</li>
<li>执行<code>qstat -f torque_job_id</code>,在输出中查找<code>exec_host</code>参数的值。列表中的第一个主机就是ringmaster节点。</li>
<li>登陆该节点。</li>
<li>ringmaster日志的位置由hodrc中的<code>ringmaster.log-dir</code>项指定。日志文件的名字会是<code>username.torque_job_id/ringmaster-main.log</code></li>
<li>如果你没有获取到足够的信息你可以将ringmaster的调试级别设为4。这可通过向hod命令行传递<code>--ringmaster.debug 4</code>做到。</li>
</ul>
</section>
<section><title>定位Hodring日志</title><anchor id="Locating_Hodring_Logs"></anchor>
<p>遵循以下步骤定位hodring日志</p>
<ul>
<li>用-b选项在调试模式下运行hod。这将打印当前运行的Torque作业的标识。</li>
<li>执行<code>qstat -f torque_job_id</code>,查看输出中<code>exec_host</code>参数的值。列表中的的所有节点上都有一个hodring。</li>
<li>登陆到任何一个节点。</li>
<li>hodring日志的位置由hodrc中的<code>hodring.log-dir</code>项指定。日志文件的名字会是<code>username.torque_job_id/hodring-main.log</code></li>
<li>如果你没有获得足够的信息你或许想将hodring的调试等级更改为4。这可以向hod命令行传递<code>--hodring.debug 4</code> 来做到。</li>
</ul>
</section>
</section>
</body>
</document>

View File

@ -0,0 +1,46 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop文档</title>
</header>
<body>
<p>
下面的文档是一些概念介绍和操作教程可帮助你开始使用Hadoop。如果遇到了问题你可以向<a href="ext:lists">邮件列表</a>求助或者浏览一下存档邮件。
</p>
<ul>
<li><a href="quickstart.html">Hadoop快速入门</a></li>
<li><a href="cluster_setup.html">Hadoop集群搭建</a></li>
<li><a href="hdfs_design.html">Hadoop分布式文件系统</a></li>
<li><a href="mapred_tutorial.html">Hadoop Map-Reduce教程</a></li>
<li><a href="native_libraries.html">Hadoop本地库</a></li>
<li><a href="ext:api/index">API参考</a></li>
<li><a href="ext:wiki">维基</a></li>
<li><a href="ext:faq">常见问题</a></li>
</ul>
<p>
</p>
</body>
</document>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,230 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop本地库</title>
</header>
<body>
<section>
<title>目的</title>
<p>
鉴于性能问题以及某些Java类库的缺失对于某些组件Hadoop提供了自己的本地实现。
这些组件保存在Hadoop的一个独立的动态链接的库里。这个库在*nix平台上叫<em>libhadoop.so</em>. 本文主要介绍本地库的使用方法以及如何构建本地库。
</p>
</section>
<section>
<title>组件</title>
<p>Hadoop现在已经有以下
<a href="ext:api/org/apache/hadoop/io/compress/compressioncodec">
compression codecs</a>本地组件:</p>
<ul>
<li><a href="ext:zlib">zlib</a></li>
<li><a href="ext:gzip">gzip</a></li>
<li><a href="ext:lzo">lzo</a></li>
</ul>
<p>在以上组件中lzo和gzip压缩编解码器必须使用hadoop本地库才能运行。
</p>
</section>
<section>
<title>使用方法</title>
<p>hadoop本地库的用法很简单</p>
<ul>
<li>
看一下
<a href="#支持的平台">支持的平台</a>.
</li>
<li>
<a href="ext:releases/download">下载</a> 预构建的32位i386架构的Linux本地hadoop库可以在hadoop发行版的<code>lib/native</code>目录下找到)或者自己
<a href="#构建Hadoop本地库">构建</a> 这些库。
</li>
<li>
确保你的平台已经安装了<strong>zlib-1.2</strong>以上版本或者<strong>lzo2.0</strong>以上版本的软件包或者两者均已安装(根据你的需要)。
</li>
</ul>
<p><code>bin/hadoop</code> 脚本通过系统属性
<em>-Djava.library.path=&lt;path&gt;</em>来确认hadoop本地库是否包含在库路径里。</p>
<p>检查hadoop日志文件可以查看hadoop库是否正常正常情况下会看到</p>
<p>
<code>
DEBUG util.NativeCodeLoader - Trying to load the custom-built
native-hadoop library...
</code><br/>
<code>
INFO util.NativeCodeLoader - Loaded the native-hadoop library
</code>
</p>
<p>如果出错,会看到:</p>
<p>
<code>
INFO util.NativeCodeLoader - Unable to load native-hadoop library for
your platform... using builtin-java classes where applicable
</code>
</p>
</section>
<section>
<title>支持的平台</title>
<p>Hadoop本地库只支持*nix平台已经广泛使用在GNU/Linux平台上但是不支持
<a href="ext:cygwin">Cygwin</a>
<a href="ext:osx">Mac OS X</a>
</p>
<p>已经测试过的GNU/Linux发行版本</p>
<ul>
<li>
<a href="http://www.redhat.com/rhel/">RHEL4</a>/<a href="http://fedora.redhat.com/">Fedora</a>
</li>
<li><a href="http://www.ubuntu.com/">Ubuntu</a></li>
<li><a href="http://www.gentoo.org/">Gentoo</a></li>
</ul>
<p>在上述平台上32/64位Hadoop本地库分别能和32/64位的jvm一起正常运行。
</p>
</section>
<section>
<title>构建Hadoop本地库</title>
<p>Hadoop本地库使用
<a href="http://en.wikipedia.org/wiki/ANSI_C">ANSI C</a> 编写使用GNU autotools工具链 (autoconf, autoheader, automake, autoscan, libtool)构建。也就是说构建hadoop库的平台需要有标准C的编译器和GNU autotools工具链。请参看
<a href="#支持的平台">支持的平台</a></p>
<p>你的目标平台上可能会需要的软件包:
</p>
<ul>
<li>
C 编译器 (e.g. <a href="http://gcc.gnu.org/">GNU C Compiler</a>)
</li>
<li>
GNU Autools 工具链:
<a href="http://www.gnu.org/software/autoconf/">autoconf</a>,
<a href="http://www.gnu.org/software/automake/">automake</a>,
<a href="http://www.gnu.org/software/libtool/">libtool</a>
</li>
<li>
zlib开发包 (stable version >= 1.2.0)
</li>
<li>
lzo开发包 (stable version >= 2.0)
</li>
</ul>
<p>如果已经满足了上述先决条件,可以使用<code>build.xml</code>
文件,并把其中的<code>compile.native</code>置为
<code>true</code>这样就可以生成hadoop本地库</p>
<p><code>$ ant -Dcompile.native=true &lt;target&gt;</code></p>
<p>因为不是所有用户都需要Hadoop本地库所以默认情况下hadoop不生成该库。
</p>
<p>你可以在下面的路径查看新生成的hadoop本地库</p>
<p><code>$ build/native/&lt;platform&gt;/lib</code></p>
<p>其中&lt;platform&gt;是下列系统属性的组合
<code>${os.name}-${os.arch}-${sun.arch.data.model}</code>;例如
Linux-i386-32。</p>
<section>
<title>注意</title>
<ul>
<li>
在生成hadoop本地库的目标平台上<strong>必须</strong> 安装了zlib和lzo开发包但是如果你只希望使用其中一个的话在部署时安装其中任何一个都是足够的。
</li>
<li>
在目标平台上生成以及部署hadoop本地库时都需要根据32/64位jvm选取对应的32/64位zlib/lzo软件包。
</li>
</ul>
</section>
</section>
<!--DCCOMMENT:diff begin-->
<section>
<title> 使用DistributedCache 加载本地库</title>
<p>用户可以通过
<a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
加载本地共享库,并<em>分发</em>和建立库文件的<em>符号链接</em>
</p>
<!--DCCOMMENT:
for <em>distributing</em> and <em>symlinking</em> the library files</p>
-->
<p>这个例子描述了如何分发库文件并在从map/reduce任务中装载库文件。
</p>
<ol>
<li>首先拷贝库文件到HDFS。<br/>
<code>bin/hadoop fs -copyFromLocal mylib.so.1 /libraries/mylib.so.1</code>
</li>
<li>启动作业时包含以下代码:<br/>
<code> DistributedCache.createSymlink(conf); </code> <br/>
<code> DistributedCache.addCacheFile("hdfs://host:port/libraries/mylib.so.1#mylib.so", conf);
</code>
</li>
<li>map/reduce任务中包含以下代码<br/>
<code> System.loadLibrary("mylib.so"); </code>
</li>
</ol>
</section>
</body>
</document>
<!--DCCOMMENT:diff end
</section>
+ <section>
+ <title> Loading native libraries through DistributedCache </title>
+ <p>User can load native shared libraries through
+ <a href="mapred_tutorial.html#DistributedCache">DistributedCache</a>
+ for <em>distributing</em> and <em>symlinking</em> the library files</p>
+
+ <p>Here is an example, describing how to distribute the library and
+ load it from map/reduce task. </p>
+ <ol>
+ <li> First copy the library to the HDFS. <br/>
+ <code>bin/hadoop fs -copyFromLocal mylib.so.1 /libraries/mylib.so.1</code>
+ </li>
+ <li> The job launching program should contain the following: <br/>
+ <code> DistributedCache.createSymlink(conf); </code> <br/>
+ <code> DistributedCache.addCacheFile("hdfs://host:port/libraries/mylib.so.1#mylib.so", conf);
+ </code>
+ </li>
+ <li> The map/reduce task can contain: <br/>
+ <code> System.loadLibrary("mylib.so"); </code>
+ </li>
+ </ol>
+ </section>
</body>
-->

View File

@ -0,0 +1,252 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop快速入门</title>
</header>
<body>
<section>
<title>目的</title>
<p>这篇文档的目的是帮助你快速完成单机上的Hadoop安装与使用以便你对<a href ="hdfs_design.html">Hadoop分布式文件系统(<acronym title="Hadoop Distributed File System">HDFS</acronym>)</a>和Map-Reduce框架有所体会比如在HDFS上运行示例程序或简单作业等。</p>
</section>
<section id="PreReqs">
<title>先决条件</title>
<section>
<title>支持平台</title>
<ul>
<li>
GNU/Linux是产品开发和运行的平台。
Hadoop已在有2000个节点的GNU/Linux主机组成的集群系统上得到验证。
</li>
<li>
Win32平台是作为<em>开发平台</em>支持的。由于分布式操作尚未在Win32平台上充分测试所以还不作为一个<em>生产平台</em>被支持。
</li>
</ul>
</section>
<section>
<title>所需软件</title>
<p>Linux和Windows所需软件包括:</p>
<ol>
<li>
Java<sup>TM</sup>1.5.x必须安装建议选择Sun公司发行的Java版本。
</li>
<li>
<strong>ssh</strong> 必须安装并且保证 <strong>sshd</strong>一直运行以便用Hadoop
脚本管理远端Hadoop守护进程。
</li>
</ol>
<p>Windows下的附加软件需求</p>
<ol>
<li>
<a href="http://www.cygwin.com/">Cygwin</a> - 提供上述软件之外的shell支持。
</li>
</ol>
</section>
<section>
<title>安装软件</title>
<p>如果你的集群尚未安装所需软件,你得首先安装它们。</p>
<p>以Ubuntu Linux为例:</p>
<p>
<code>$ sudo apt-get install ssh</code><br/>
<code>$ sudo apt-get install rsync</code>
</p>
<p>在Windows平台上如果安装cygwin时未安装全部所需软件则需启动cyqwin安装管理器安装如下软件包</p>
<ul>
<li>openssh - <em>Net</em></li>
</ul>
</section>
</section>
<section>
<title>下载</title>
<p>
为了获取Hadoop的发行版从Apache的某个镜像服务器上下载最近的
<a href="ext:releases">稳定发行版</a></p>
</section>
<section>
<title>运行Hadoop集群的准备工作</title>
<p>
解压所下载的Hadoop发行版。编辑
<code>conf/hadoop-env.sh</code>文件,至少需要将<code>JAVA_HOME</code>设置为Java安装根路径。
</p>
<p>
尝试如下命令:<br/>
<code>$ bin/hadoop</code><br/>
将会显示<strong>hadoop</strong> 脚本的使用文档。
</p>
<p>现在你可以用以下三种支持的模式中的一种启动Hadoop集群
</p>
<ul>
<li>单机模式</li>
<li>伪分布式模式</li>
<li>完全分布式模式</li>
</ul>
</section>
<section id="Local">
<title>单机模式的操作方法</title>
<p>默认情况下Hadoop被配置成以非分布式模式运行的一个独立Java进程。这对调试非常有帮助。</p>
<p>
下面的实例将已解压的 <code>conf</code> 目录拷贝作为输入,查找并显示匹配给定正则表达式的条目。输出写入到指定的<code>output</code>目录。
<br/>
<code>$ mkdir input</code><br/>
<code>$ cp conf/*.xml input</code><br/>
<code>
$ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
</code><br/>
<code>$ cat output/*</code>
</p>
</section>
<section id="PseudoDistributed">
<title>伪分布式模式的操作方法</title>
<p>Hadoop可以在单节点上以所谓的伪分布式模式运行此时每一个Hadoop守护进程都作为一个独立的Java进程运行。</p>
<section>
<title>配置</title>
<p>使用如下的 <code>conf/hadoop-site.xml</code>:</p>
<table>
<tr><td>&lt;configuration&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;fs.default.name&lt;/name&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9000&lt;/value&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;mapred.job.tracker&lt;/name&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;localhost:9001&lt;/value&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&lt;property&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;name&gt;dfs.replication&lt;/name&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&nbsp;&nbsp;&lt;value&gt;1&lt;/value&gt;</td></tr>
<tr><td>&nbsp;&nbsp;&lt;/property&gt;</td></tr>
<tr><td>&lt;/configuration&gt;</td></tr>
</table>
</section>
<section>
<title>免密码<em>ssh</em>设置</title>
<p>
现在确认能否不输入口令就用ssh登录localhost:<br/>
<code>$ ssh localhost</code>
</p>
<p>
如果不输入口令就无法用ssh登陆localhost执行下面的命令<br/>
<code>$ ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa</code><br/>
<code>$ cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys</code>
</p>
</section>
<section>
<title>执行</title>
<p>
格式化一个新的分布式文件系统:<br/>
<code>$ bin/hadoop namenode -format</code>
</p>
<p>
启动Hadoop守护进程<br/>
<code>$ bin/start-all.sh</code>
</p>
<p>Hadoop守护进程的日志写入到
<code>${HADOOP_LOG_DIR}</code> 目录 (默认是
<code>${HADOOP_HOME}/logs</code>).</p>
<p>浏览NameNode和JobTracker的网络接口它们的地址默认为</p>
<ul>
<li>
<code>NameNode</code> -
<a href="http://localhost:50070/">http://localhost:50070/</a>
</li>
<li>
<code>JobTracker</code> -
<a href="http://localhost:50030/">http://localhost:50030/</a>
</li>
</ul>
<p>
将输入文件拷贝到分布式文件系统:<br/>
<code>$ bin/hadoop fs -put conf input</code>
</p>
<p>
运行发行版提供的示例程序:<br/>
<code>
$ bin/hadoop jar hadoop-*-examples.jar grep input output 'dfs[a-z.]+'
</code>
</p>
<p>查看输出文件:</p>
<p>
将输出文件从分布式文件系统拷贝到本地文件系统查看:<br/>
<code>$ bin/hadoop fs -get output output</code><br/>
<code>$ cat output/*</code>
</p>
<p> 或者 </p>
<p>
在分布式文件系统上查看输出文件:<br/>
<code>$ bin/hadoop fs -cat output/*</code>
</p>
<p>
完成全部操作后,停止守护进程:<br/>
<code>$ bin/stop-all.sh</code>
</p>
</section>
</section>
<section id="FullyDistributed">
<title>完全分布式模式的操作方法</title>
<p>关于搭建完全分布式模式的,有实际意义的集群的资料可以在<a href="cluster_setup.html">这里</a>找到。</p>
</section>
<p>
<em>Java与JNI是Sun Microsystems, Inc.在美国以及其他国家地区的商标或注册商标。</em>
</p>
</body>
</document>

View File

@ -0,0 +1,249 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Forrest site.xml
This file contains an outline of the site's information content. It is used to:
- Generate the website menus (though these can be overridden - see docs)
- Provide semantic, location-independent aliases for internal 'site:' URIs, eg
<link href="site:changes"> links to changes.html (or ../changes.html if in
subdir).
- Provide aliases for external URLs in the external-refs section. Eg, <link
href="ext:cocoon"> links to http://xml.apache.org/cocoon/
See http://forrest.apache.org/docs/linking.html for more info.
-->
<site label="Hadoop" href="" xmlns="http://apache.org/forrest/linkmap/1.0">
<docs label="文档">
<overview label="概述" href="index.html" />
<quickstart label="快速入门" href="quickstart.html" />
<setup label="集群搭建" href="cluster_setup.html" />
<hdfs label="HDFS构架设计" href="hdfs_design.html" />
<hdfs label="HDFS使用指南" href="hdfs_user_guide.html" />
<hdfs label="HDFS权限指南" href="hdfs_permissions_guide.html" />
<hdfs label="HDFS配额管理指南" href="hdfs_quota_admin_guide.html" />
<commands label="命令手册" href="commands_manual.html" />
<fs label="FS Shell使用指南" href="hdfs_shell.html" />
<distcp label="DistCp使用指南" href="distcp.html" />
<mapred label="Map-Reduce教程" href="mapred_tutorial.html" />
<mapred label="Hadoop本地库" href="native_libraries.html" />
<streaming label="Streaming" href="streaming.html" />
<archives label="Hadoop Archives" href="hadoop_archives.html"/>
<hod label="Hadoop On Demand" href="hod.html">
<hod-user-guide href="hod_user_guide.html"/>
<hod-admin-guide href="hod_admin_guide.html"/>
<hod-config-guide href="hod_config_guide.html"/>
</hod>
<api label="API参考" href="ext:api/index" />
<jdiff label="API Changes" href="ext:jdiff" />
<wiki label="维基" href="ext:wiki" />
<faq label="常见问题" href="ext:faq" />
<lists label="邮件列表" href="ext:lists" />
<relnotes label="发行说明" href="ext:relnotes" />
<changes label="变更日志" href="ext:changes" />
</docs>
<external-refs>
<site href="http://hadoop.apache.org/core/"/>
<lists href="http://hadoop.apache.org/core/mailing_lists.html"/>
<releases href="http://hadoop.apache.org/core/releases.html">
<download href="#Download" />
</releases>
<jira href="http://hadoop.apache.org/core/issue_tracking.html"/>
<wiki href="http://wiki.apache.org/hadoop/" />
<faq href="http://wiki.apache.org/hadoop/FAQ" />
<hadoop-default href="http://hadoop.apache.org/core/docs/current/hadoop-default.html" />
<zlib href="http://www.zlib.net/" />
<lzo href="http://www.oberhumer.com/opensource/lzo/" />
<gzip href="http://www.gzip.org/" />
<cygwin href="http://www.cygwin.com/" />
<osx href="http://www.apple.com/macosx" />
<hod href="">
<cluster-resources href="http://www.clusterresources.com" />
<torque href="http://www.clusterresources.com/pages/products/torque-resource-manager.php" />
<torque-download href="http://www.clusterresources.com/downloads/torque/" />
<torque-docs href="http://www.clusterresources.com/pages/resources/documentation.php" />
<torque-wiki href="http://www.clusterresources.com/wiki/doku.php?id=torque:torque_wiki" />
<torque-mailing-list href="http://www.clusterresources.com/pages/resources/mailing-lists.php" />
<torque-basic-config href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.2_basic_configuration" />
<torque-advanced-config href="http://www.clusterresources.com/wiki/doku.php?id=torque:1.3_advanced_configuration" />
<maui href="http://www.clusterresources.com/pages/products/maui-cluster-scheduler.php"/>
<python href="http://www.python.org" />
<twisted-python href="http://twistedmatrix.com/trac/" />
</hod>
<relnotes href="http://hadoop.apache.org/core/docs/r0.18.2/releasenotes.html" />
<changes href="http://hadoop.apache.org/core/docs/r0.18.2/changes.html" />
<jdiff href="http://hadoop.apache.org/core/docs/r0.18.2/jdiff/changes.html" />
<api href="http://hadoop.apache.org/core/docs/r0.18.2/api/">
<index href="index.html" />
<org href="org/">
<apache href="apache/">
<hadoop href="hadoop/">
<conf href="conf/">
<configuration href="Configuration.html">
<final_parameters href="#FinalParams" />
<get href="#get(java.lang.String, java.lang.String)" />
<set href="#set(java.lang.String, java.lang.String)" />
</configuration>
</conf>
<filecache href="filecache/">
<distributedcache href="DistributedCache.html">
<addarchivetoclasspath href="#addArchiveToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)" />
<addfiletoclasspath href="#addFileToClassPath(org.apache.hadoop.fs.Path,%20org.apache.hadoop.conf.Configuration)" />
<addcachefile href="#addCacheFile(java.net.URI,%20org.apache.hadoop.conf.Configuration)" />
<addcachearchive href="#addCacheArchive(java.net.URI,%20org.apache.hadoop.conf.Configuration)" />
<setcachefiles href="#setCacheFiles(java.net.URI[],%20org.apache.hadoop.conf.Configuration)" />
<setcachearchives href="#setCacheArchives(java.net.URI[],%20org.apache.hadoop.conf.Configuration)" />
<createsymlink href="#createSymlink(org.apache.hadoop.conf.Configuration)" />
</distributedcache>
</filecache>
<fs href="fs/">
<filesystem href="FileSystem.html" />
</fs>
<io href="io/">
<closeable href="Closeable.html">
<close href="#close()" />
</closeable>
<sequencefile href="SequenceFile.html" />
<sequencefilecompressiontype href="SequenceFile.CompressionType.html">
<none href="#NONE" />
<record href="#RECORD" />
<block href="#BLOCK" />
</sequencefilecompressiontype>
<writable href="Writable.html" />
<writablecomparable href="WritableComparable.html" />
<compress href="compress/">
<compressioncodec href="CompressionCodec.html" />
</compress>
</io>
<mapred href="mapred/">
<clusterstatus href="ClusterStatus.html" />
<counters href="Counters.html" />
<fileinputformat href="FileInputFormat.html">
<setinputpaths href="#setInputPaths(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path[])" />
<addinputpath href="#addInputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)" />
<setinputpathstring href="#setInputPaths(org.apache.hadoop.mapred.JobConf,%20java.lang.String)" />
<addinputpathstring href="#addInputPath(org.apache.hadoop.mapred.JobConf,%20java.lang.String)" />
</fileinputformat>
<fileoutputformat href="FileOutputFormat.html">
<getoutputpath href="#getOutputPath(org.apache.hadoop.mapred.JobConf)" />
<getworkoutputpath href="#getWorkOutputPath(org.apache.hadoop.mapred.JobConf)" />
<setoutputpath href="#setOutputPath(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.fs.Path)" />
<setcompressoutput href="#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)" />
<setoutputcompressorclass href="#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)" />
</fileoutputformat>
<filesplit href="FileSplit.html" />
<inputformat href="InputFormat.html" />
<inputsplit href="InputSplit.html" />
<isolationrunner href="IsolationRunner.html" />
<jobclient href="JobClient.html">
<runjob href="#runJob(org.apache.hadoop.mapred.JobConf)" />
<submitjob href="#submitJob(org.apache.hadoop.mapred.JobConf)" />
</jobclient>
<jobconf href="JobConf.html">
<setnummaptasks href="#setNumMapTasks(int)" />
<setnumreducetasks href="#setNumReduceTasks(int)" />
<setoutputkeycomparatorclass href="#setOutputKeyComparatorClass(java.lang.Class)" />
<setoutputvaluegroupingcomparator href="#setOutputValueGroupingComparator(java.lang.Class)" />
<setcombinerclass href="#setCombinerClass(java.lang.Class)" />
<setmapdebugscript href="#setMapDebugScript(java.lang.String)" />
<setreducedebugscript href="#setReduceDebugScript(java.lang.String)" />
<setmapspeculativeexecution href="#setMapSpeculativeExecution(boolean)" />
<setreducespeculativeexecution href="#setReduceSpeculativeExecution(boolean)" />
<setmaxmapattempts href="#setMaxMapAttempts(int)" />
<setmaxreduceattempts href="#setMaxReduceAttempts(int)" />
<setmaxmaptaskfailurespercent href="#setMaxMapTaskFailuresPercent(int)" />
<setmaxreducetaskfailurespercent href="#setMaxReduceTaskFailuresPercent(int)" />
<setjobendnotificationuri href="#setJobEndNotificationURI(java.lang.String)" />
<setcompressmapoutput href="#setCompressMapOutput(boolean)" />
<setmapoutputcompressorclass href="#setMapOutputCompressorClass(java.lang.Class)" />
<setprofileenabled href="#setProfileEnabled(boolean)" />
<setprofiletaskrange href="#setProfileTaskRange(boolean,%20java.lang.String)" />
<setprofileparams href="#setProfileParams(java.lang.String)" />
<getjoblocaldir href="#getJobLocalDir()" />
<getjar href="#getJar()" />
</jobconf>
<jobconfigurable href="JobConfigurable.html">
<configure href="#configure(org.apache.hadoop.mapred.JobConf)" />
</jobconfigurable>
<jobcontrol href="jobcontrol/">
<package-summary href="package-summary.html" />
</jobcontrol>
<mapper href="Mapper.html">
<map href="#map(K1, V1, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)" />
</mapper>
<outputcollector href="OutputCollector.html">
<collect href="#collect(K, V)" />
</outputcollector>
<outputformat href="OutputFormat.html" />
<outputformatbase href="OutputFormatBase.html">
<setcompressoutput href="#setCompressOutput(org.apache.hadoop.mapred.JobConf,%20boolean)" />
<setoutputcompressorclass href="#setOutputCompressorClass(org.apache.hadoop.mapred.JobConf,%20java.lang.Class)" />
</outputformatbase>
<outputlogfilter href="OutputLogFilter.html" />
<sequencefileoutputformat href="SequenceFileOutputFormat.html">
<setoutputcompressiontype href="#setOutputCompressionType(org.apache.hadoop.mapred.JobConf,%20org.apache.hadoop.io.SequenceFile.CompressionType)" />
</sequencefileoutputformat>
<partitioner href="Partitioner.html" />
<recordreader href="RecordReader.html" />
<recordwriter href="RecordWriter.html" />
<reducer href="Reducer.html">
<reduce href="#reduce(K2, java.util.Iterator, org.apache.hadoop.mapred.OutputCollector, org.apache.hadoop.mapred.Reporter)" />
</reducer>
<reporter href="Reporter.html">
<incrcounterEnum href="#incrCounter(java.lang.Enum, long)" />
<incrcounterString href="#incrCounter(java.lang.String, java.lang.String, long amount)" />
</reporter>
<runningjob href="RunningJob.html" />
<textinputformat href="TextInputFormat.html" />
<textoutputformat href="TextOutputFormat.html" />
<lib href="lib/">
<package-summary href="package-summary.html" />
<hashpartitioner href="HashPartitioner.html" />
</lib>
<pipes href="pipes/">
<package-summary href="package-summary.html" />
</pipes>
</mapred>
<net href="net/">
<dnstoswitchmapping href="DNSToSwitchMapping.html">
<resolve href="#resolve(java.util.List)" />
</dnstoswitchmapping>
</net>
<streaming href="streaming/">
<package-summary href="package-summary.html" />
</streaming>
<util href="util/">
<genericoptionsparser href="GenericOptionsParser.html" />
<progress href="Progress.html" />
<tool href="Tool.html" />
<toolrunner href="ToolRunner.html">
<run href="#run(org.apache.hadoop.util.Tool, java.lang.String[])" />
</toolrunner>
</util>
</hadoop>
</apache>
</org>
</api>
</external-refs>
</site>

View File

@ -0,0 +1,618 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN"
"http://forrest.apache.org/dtd/document-v20.dtd">
<document>
<header>
<title>Hadoop Streaming</title>
<meta name="http-equiv">Content-Type</meta>
<meta name="content">text/html;</meta>
<meta name="charset">utf-8</meta>
</header>
<body>
<section>
<title>Hadoop Streaming</title>
<p>
Hadoop streaming是Hadoop的一个工具
它帮助用户创建和运行一类特殊的map/reduce作业
这些特殊的map/reduce作业是由一些可执行文件或脚本文件充当mapper或者reducer。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper /bin/cat \
-reducer /bin/wc
</source>
</section>
<section>
<title>Streaming工作原理</title>
<p>
在上面的例子里mapper和reducer都是可执行文件它们从标准输入读入数据一行一行读
并把计算结果发给标准输出。Streaming工具会创建一个Map/Reduce作业
并把它发送给合适的集群,同时监视这个作业的整个执行过程。
</p><p>
如果一个可执行文件被用于mapper则在mapper初始化时
每一个mapper任务会把这个可执行文件作为一个单独的进程启动。
mapper任务运行时它把输入切分成行并把每一行提供给可执行文件进程的标准输入。
同时mapper收集可执行文件进程标准输出的内容并把收到的每一行内容转化成key/value对作为mapper的输出。
默认情况下一行中第一个tab之前的部分作为<strong>key</strong>之后的不包括tab作为<strong>value</strong>
如果没有tab整行作为key值value值为null。不过这可以定制在下文中将会讨论如何自定义key和value的切分方式。
</p>
<p>如果一个可执行文件被用于reducer每个reducer任务会把这个可执行文件作为一个单独的进程启动。
Reducer任务运行时它把输入切分成行并把每一行提供给可执行文件进程的标准输入。
同时reducer收集可执行文件进程标准输出的内容并把每一行内容转化成key/value对作为reducer的输出。
默认情况下一行中第一个tab之前的部分作为key之后的不包括tab作为value。在下文中将会讨论如何自定义key和value的切分方式。
</p><p>
这是Map/Reduce框架和streaming mapper/reducer之间的基本通信协议。
</p><p>
用户也可以使用java类作为mapper或者reducer。上面的例子与这里的代码等价
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper \
-reducer /bin/wc
</source>
<p>用户可以设定<code>stream.non.zero.exit.is.failure</code>
<code>true</code><code>false</code> 来表明streaming task的返回值非零时是
<code>Failure</code>
还是<code>Success</code>。默认情况streaming task返回非零时表示失败。
</p>
</section>
<section>
<title>将文件打包到提交的作业中</title>
<p>
任何可执行文件都可以被指定为mapper/reducer。这些可执行文件不需要事先存放在集群上
如果在集群上还没有,则需要用-file选项让framework把可执行文件作为作业的一部分一起打包提交。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper myPythonScript.py \
-reducer /bin/wc \
-file myPythonScript.py
</source>
<p>
上面的例子描述了一个用户把可执行python文件作为mapper。
其中的选项“-file myPythonScirpt.py”使可执行python文件作为作业提交的一部分被上传到集群的机器上。
</p>
<p>
除了可执行文件外其他mapper或reducer需要用到的辅助文件比如字典配置文件等也可以用这种方式打包上传。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper myPythonScript.py \
-reducer /bin/wc \
-file myPythonScript.py \
-file myDictionary.txt
</source>
</section>
<section>
<title>Streaming选项与用法</title>
<section>
<title>只使用Mapper的作业</title>
<p>
有时只需要map函数处理输入数据。这时只需把mapred.reduce.tasks设置为零Map/reduce框架就不会创建reducer任务mapper任务的输出就是整个作业的最终输出。
</p><p>
为了做到向下兼容Hadoop Streaming也支持“-reduce None”选项它与“-jobconf mapred.reduce.tasks=0”等价。
</p>
</section>
<section>
<title>为作业指定其他插件</title>
<p>
和其他普通的Map/Reduce作业一样用户可以为streaming作业指定其他插件
</p>
<source>
-inputformat JavaClassName
-outputformat JavaClassName
-partitioner JavaClassName
-combiner JavaClassName
</source>
<p>用于处理输入格式的类要能返回Text类型的key/value对。如果不指定输入格式则默认会使用TextInputFormat。
因为TextInputFormat得到的key值是LongWritable类型的其实key值并不是输入文件中的内容而是value偏移量
所以key会被丢弃只把value用管道方式发给mapper。
</p><p>
用户提供的定义输出格式的类需要能够处理Text类型的key/value对。如果不指定输出格式则默认会使用TextOutputFormat类。
</p>
</section>
<section>
<title>Hadoop Streaming中的大文件和档案</title>
<p>任务使用-cacheFile和-cacheArchive选项在集群中分发文件和档案选项的参数是用户已上传至HDFS的文件或档案的URI。这些文件和档案在不同的作业间缓存。用户可以通过fs.default.name.config配置参数的值得到文件所在的host和fs_port。
</p>
<p>
这个是使用-cacheFile选项的例子
</p>
<source>
-cacheFile hdfs://host:fs_port/user/testfile.txt#testlink
</source>
<p>在上面的例子里url中#后面的部分是建立在任务当前工作目录下的符号链接的名字。这里的任务的当前工作目录下有一个“testlink”符号链接它指向testfile.txt文件在本地的拷贝。如果有多个文件选项可以写成
</p>
<source>
-cacheFile hdfs://host:fs_port/user/testfile1.txt#testlink1 -cacheFile hdfs://host:fs_port/user/testfile2.txt#testlink2
</source>
<p>
-cacheArchive选项用于把jar文件拷贝到任务当前工作目录并自动把jar文件解压缩。例如
</p>
<source>
-cacheArchive hdfs://host:fs_port/user/testfile.jar#testlink3
</source>
<p>
在上面的例子中testlink3是当前工作目录下的符号链接它指向testfile.jar解压后的目录。
</p>
<p>
下面是使用-cacheArchive选项的另一个例子。其中input.txt文件有两行内容分别是两个文件的名字testlink/cache.txt和testlink/cache2.txt。“testlink”是指向档案目录jar文件解压后的目录的符号链接这个目录下有“cache.txt”和“cache2.txt”两个文件。
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input "/user/me/samples/cachefile/input.txt" \
-mapper "xargs cat" \
-reducer "cat" \
-output "/user/me/samples/cachefile/out" \
-cacheArchive 'hdfs://hadoop-nn1.example.com/user/me/samples/cachefile/cachedir.jar#testlink' \
-jobconf mapred.map.tasks=1 \
-jobconf mapred.reduce.tasks=1 \
-jobconf mapred.job.name="Experiment"
$ ls test_jar/
cache.txt cache2.txt
$ jar cvf cachedir.jar -C test_jar/ .
added manifest
adding: cache.txt(in = 30) (out= 29)(deflated 3%)
adding: cache2.txt(in = 37) (out= 35)(deflated 5%)
$ hadoop dfs -put cachedir.jar samples/cachefile
$ hadoop dfs -cat /user/me/samples/cachefile/input.txt
testlink/cache.txt
testlink/cache2.txt
$ cat test_jar/cache.txt
This is just the cache string
$ cat test_jar/cache2.txt
This is just the second cache string
$ hadoop dfs -ls /user/me/samples/cachefile/out
Found 1 items
/user/me/samples/cachefile/out/part-00000 &lt;r 3&gt; 69
$ hadoop dfs -cat /user/me/samples/cachefile/out/part-00000
This is just the cache string
This is just the second cache string
</source>
</section>
<section>
<title>为作业指定附加配置参数</title>
<p>
用户可以使用“-jobconf &lt;n&gt;=&lt;v&gt;”增加一些配置变量。例如:
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper\
-reducer /bin/wc \
-jobconf mapred.reduce.tasks=2
</source>
<p>
上面的例子中,-jobconf mapred.reduce.tasks=2表明用两个reducer完成作业。
</p>
<p>
关于jobconf参数的更多细节可以参考<a href="ext:hadoop-default">hadoop-default.html</a></p>
</section>
<section>
<title>其他选项</title>
<p>
Streaming 作业的其他选项如下表:
</p>
<table>
<tr><th>选项</th><th>可选/必须</th><th>描述</th></tr>
<tr><td> -cluster name </td><td> 可选 </td><td> 在本地Hadoop集群与一个或多个远程集群间切换</td></tr>
<tr><td> -dfs host:port or local </td><td> 可选 </td><td> 覆盖作业的HDFS配置</td></tr>
<tr><td> -jt host:port or local </td><td> 可选 </td><td> 覆盖作业的JobTracker配置</td></tr>
<tr><td> -additionalconfspec specfile </td><td> 可选 </td><td> 用一个类似于hadoop-site.xml的XML文件保存所有配置从而不需要用多个"-jobconf name=value"类型的选项单独为每个配置变量赋值</td></tr>
<tr><td> -cmdenv name=value </td><td> 可选 </td><td> 传递环境变量给streaming命令</td></tr>
<tr><td> -cacheFile fileNameURI </td><td> 可选 </td><td> 指定一个上传到HDFS的文件</td></tr>
<tr><td> -cacheArchive fileNameURI </td><td> 可选 </td><td> 指定一个上传到HDFS的jar文件这个jar文件会被自动解压缩到当前工作目录下</td></tr>
<tr><td> -inputreader JavaClassName </td><td> 可选 </td><td> 为了向下兼容指定一个record reader类而不是input format类</td></tr>
<tr><td> -verbose </td><td> 可选 </td><td> 详细输出 </td></tr>
</table>
<p>
使用-cluster &lt;name&gt;实现“本地”Hadoop和一个或多个远程Hadoop集群间切换。默认情况下使用hadoop-default.xml和hadoop-site.xml当使用-cluster &lt;name&gt;选项时,会使用$HADOOP_HOME/conf/hadoop-&lt;name&gt;.xml。
</p>
<p>
下面的选项改变temp目录
</p>
<source>
-jobconf dfs.data.dir=/tmp
</source>
<p>
下面的选项指定其他本地temp目录
</p>
<source>
-jobconf mapred.local.dir=/tmp/local
-jobconf mapred.system.dir=/tmp/system
-jobconf mapred.temp.dir=/tmp/temp
</source>
<p>
更多有关jobconf的细节请参考<a href="http://wiki.apache.org/hadoop/JobConfFile">http://wiki.apache.org/hadoop/JobConfFile</a>
</p><p>
在streaming命令中设置环境变量
</p>
<source>
-cmdenv EXAMPLE_DIR=/home/example/dictionaries/
</source>
</section>
</section>
<section>
<title>其他例子</title>
<section>
<title>使用自定义的方法切分行来形成Key/Value对</title>
<p>
之前已经提到当Map/Reduce框架从mapper的标准输入读取一行时它把这一行切分为key/value对。
在默认情况下每行第一个tab符之前的部分作为key之后的部分作为value不包括tab符
</p>
<p>
但是用户可以自定义可以指定分隔符是其他字符而不是默认的tab符或者指定在第nn>=1个分割符处分割而不是默认的第一个。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper \
-reducer org.apache.hadoop.mapred.lib.IdentityReducer \
-jobconf stream.map.output.field.separator=. \
-jobconf stream.num.map.output.key.fields=4
</source>
<p>
在上面的例子,“-jobconf stream.map.output.field.separator=.”指定“.”作为map输出内容的分隔符并且从在第四个“.”之前的部分作为key之后的部分作为value不包括这第四个“.”)。
如果一行中的“.”少于四个则整行的内容作为keyvalue设为空的Text对象就像这样创建了一个Textnew Text(""))。
</p><p>
同样,用户可以使用“-jobconf stream.reduce.output.field.separator=SEP”和“-jobconf stream.num.reduce.output.fields=NUM”来指定reduce输出的行中第几个分隔符处分割key和value。
</p>
</section>
<section>
<title>一个实用的Partitioner类<!--A Useful Partitioner Class--> (二次排序,-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner 选项) </title>
<p>
Hadoop有一个工具类org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner
它在应用程序中很有用。Map/reduce框架用这个类切分map的输出
切分是基于key值的前缀而不是整个key。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.IdentityMapper \
-reducer org.apache.hadoop.mapred.lib.IdentityReducer \
-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
-jobconf stream.map.output.field.separator=. \
-jobconf stream.num.map.output.key.fields=4 \
-jobconf map.output.key.field.separator=. \
-jobconf num.key.fields.for.partition=2 \
-jobconf mapred.reduce.tasks=12
</source>
<p>
其中,<em>-jobconf stream.map.output.field.separator=.</em><em>-jobconf stream.num.map.output.key.fields=4</em>是前文中的例子。Streaming用这两个变量来得到mapper的key/value对。
</p><p>
上面的Map/Reduce 作业中map输出的key一般是由“.”分割成的四块。但是因为使用了
<em>-jobconf num.key.fields.for.partition=2</em>
选项所以Map/Reduce框架使用key的前两块来切分map的输出。其中
<em>-jobconf map.output.key.field.separator=.</em>
指定了这次切分使用的key的分隔符。这样可以保证在所有key/value对中
key值前两个块值相同的所有key被分到一组分配给一个reducer。
</p><p>
<em>这种高效的方法等价于指定前两块作为主键,后两块作为副键。
主键用于切分块,主键和副键的组合用于排序。</em>一个简单的示例如下:
</p>
<p>
Map的输出key</p><source>
11.12.1.2
11.14.2.3
11.11.4.1
11.12.1.1
11.14.2.2
</source>
<p>
切分给3个reducer前两块的值用于切分</p><source>
11.11.4.1
-----------
11.12.1.2
11.12.1.1
-----------
11.14.2.3
11.14.2.2
</source>
<p>
在每个切分后的组内排序(四个块的值都用于排序)
</p><source>
11.11.4.1
-----------
11.12.1.1
11.12.1.2
-----------
11.14.2.2
11.14.2.3
</source>
</section>
<section>
<title>Hadoop聚合功能包的使用-reduce aggregate 选项)</title>
<p>
Hadoop有一个工具包“Aggregate”
<a href="https://svn.apache.org/repos/asf/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate">https://svn.apache.org/repos/asf/hadoop/core/trunk/src/java/org/apache/hadoop/mapred/lib/aggregate</a>)。
“Aggregate”提供一个特殊的reducer类和一个特殊的combiner类
并且有一系列的“聚合器”“aggregator”例如“sum”“max”“min”等用于聚合一组value的序列。
用户可以使用Aggregate定义一个mapper插件类
这个类用于为mapper输入的每个key/value对产生“可聚合项”。
combiner/reducer利用适当的聚合器聚合这些可聚合项。
</p><p>
要使用Aggregate只需指定“-reducer aggregate”</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper myAggregatorForKeyCount.py \
-reducer aggregate \
-file myAggregatorForKeyCount.py \
-jobconf mapred.reduce.tasks=12
</source>
<p>
python程序myAggregatorForKeyCount.py例子
</p>
<source>
#!/usr/bin/python
import sys;
def generateLongCountToken(id):
return "LongValueSum:" + id + "\t" + "1"
def main(argv):
line = sys.stdin.readline();
try:
while line:
line = line&#91;:-1];
fields = line.split("\t");
print generateLongCountToken(fields&#91;0]);
line = sys.stdin.readline();
except "end of file":
return None
if __name__ == "__main__":
main(sys.argv)
</source>
</section>
<section>
<title>字段的选取类似于unix中的 'cut' 命令) </title>
<p>
Hadoop的工具类org.apache.hadoop.mapred.lib.FieldSelectionMapReduce帮助用户高效处理文本数据
就像unix中的“cut”工具。工具类中的map函数把输入的key/value对看作字段的列表。
用户可以指定字段的分隔符默认是tab
可以选择字段列表中任意一段由列表中一个或多个字段组成作为map输出的key或者value。
同样工具类中的reduce函数也把输入的key/value对看作字段的列表用户可以选取任意一段作为reduce输出的key或value。例如
</p>
<source>
$HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input myInputDirs \
-output myOutputDir \
-mapper org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
-reducer org.apache.hadoop.mapred.lib.FieldSelectionMapReduce\
-partitioner org.apache.hadoop.mapred.lib.KeyFieldBasedPartitioner \
-jobconf map.output.key.field.separa=. \
-jobconf num.key.fields.for.partition=2 \
-jobconf mapred.data.field.separator=. \
-jobconf map.output.key.value.fields.spec=6,5,1-3:0- \
-jobconf reduce.output.key.value.fields.spec=0-2:5- \
-jobconf mapred.reduce.tasks=12
</source>
<p>
选项“-jobconf map.output.key.value.fields.spec=6,5,1-3:0-”指定了如何为map的输出选取key和value。Key选取规则和value选取规则由“:”分割。
在这个例子中map输出的key由字段6512和3组成。输出的value由所有字段组成“0-”指字段0以及之后所有字段
</p>
<p>
选项“-jobconf reduce.output.key.value.fields.spec=0-2:0-”译者注此处应为”0-2:5-“指定如何为reduce的输出选取value。
本例中reduce的输出的key将包含字段012对应于原始的字段651
reduce输出的value将包含起自字段5的所有字段对应于所有的原始字段
</p>
</section>
</section>
<section>
<title>常见问题</title>
<section>
<title>我该怎样使用Hadoop Streaming运行一组独立相关的任务呢</title>
<p>
多数情况下你不需要Map Reduce的全部功能
而只需要运行同一程序的多个实例,或者使用不同数据,或者在相同数据上使用不同的参数。
你可以通过Hadoop Streaming来实现。</p>
</section>
<section>
<title>如何处理多个文件其中每个文件一个map</title>
<p>
例如这样一个问题在集群上压缩zipping一些文件你可以使用以下几种方法</p><ol>
<li>使用Hadoop Streaming和用户编写的mapper脚本程序<ul>
<li> 生成一个文件文件中包含所有要压缩的文件在HDFS上的完整路径。每个map 任务获得一个路径名作为输入。</li>
<li> 创建一个mapper脚本程序实现如下功能获得文件名把该文件拷贝到本地压缩该文件并把它发到期望的输出目录。</li>
</ul></li>
<li>使用现有的Hadoop框架<ul>
<li>在main函数中添加如下命令
<source>
FileOutputFormat.setCompressOutput(conf, true);
FileOutputFormat.setOutputCompressorClass(conf, org.apache.hadoop.io.compress.GzipCodec.class);
conf.setOutputFormat(NonSplitableTextInputFormat.class);
conf.setNumReduceTasks(0);
</source></li>
<li>编写map函数
<source>
public void map(WritableComparable key, Writable value,
OutputCollector output,
Reporter reporter) throws IOException {
output.collect((Text)value, null);
}
</source></li>
<li>注意输出的文件名和原文件名不同</li>
</ul></li>
</ol>
</section>
<section>
<title>应该使用多少个reducer</title>
<p>
请参考Hadoop Wiki<a href="mapred_tutorial.html#Reducer">Reducer</a>
</p>
</section>
<section>
<title>
如果在Shell脚本里设置一个别名并放在-mapper之后Streaming会正常运行吗
例如alias cl='cut -fl'-mapper "cl"会运行正常吗?
</title>
<p>
脚本里无法使用别名,但是允许变量替换,例如:
</p>
<source>
$ hadoop dfs -cat samples/student_marks
alice 50
bruce 70
charlie 80
dan 75
$ c2='cut -f2'; $HADOOP_HOME/bin/hadoop jar $HADOOP_HOME/hadoop-streaming.jar \
-input /user/me/samples/student_marks
-mapper \"$c2\" -reducer 'cat'
-output /user/me/samples/student_out
-jobconf mapred.job.name='Experiment'
$ hadoop dfs -ls samples/student_out
Found 1 items/user/me/samples/student_out/part-00000 &lt;r 3&gt; 16
$ hadoop dfs -cat samples/student_out/part-00000
50
70
75
80
</source>
</section>
<section>
<title>
我可以使用UNIX pipes吗例如 mapper "cut fl | set s/foo/bar/g"管用么?
</title>
<p>
现在不支持而且会给出错误信息“java.io.IOException: Broken pipe”。这或许是一个bug需要进一步研究。
</p>
</section>
<section>
<title>在streaming作业中用-file选项运行一个<strong>分布式的超大可执行文件例如3.6G</strong>时,
我得到了一个错误信息“No space left on device”。如何解决
</title>
<p>
配置变量stream.tmpdir指定了一个目录在这个目录下要进行打jar包的操作。stream.tmpdir的默认值是/tmp你需要将这个值设置为一个有更大空间的目录
</p>
<source>
-jobconf stream.tmpdir=/export/bigspace/...
</source>
</section>
<section>
<title>如何设置多个输入目录?</title>
<p>
可以使用多个-input选项设置多个输入目录
</p><source>
hadoop jar hadoop-streaming.jar -input '/user/foo/dir1' -input '/user/foo/dir2'
</source>
</section>
<section>
<title>如何生成gzip格式的输出文件</title>
<p>
除了纯文本格式的输出你还可以生成gzip文件格式的输出你只需设置streaming作业中的选项-jobconf mapred.output.compress=true -jobconf mapred.output.compression.codec=org.apache.hadoop.io.compress.GzipCode
</p>
</section>
<section>
<title>Streaming中如何自定义input/output format</title>
<p>
至少在Hadoop 0.14版本以前不支持多个jar文件。所以当指定自定义的类时你要把他们和原有的streaming jar打包在一起并用这个自定义的jar包替换默认的hadoop streaming jar包。
</p>
</section>
<section>
<title>Streaming如何解析XML文档</title>
<p>
你可以使用StreamXmlRecordReader来解析XML文档。
</p>
<source>
hadoop jar hadoop-streaming.jar -inputreader "StreamXmlRecord,begin=BEGIN_STRING,end=END_STRING" ..... (rest of the command)
</source>
<p>
Map任务会把BEGIN_STRING和END_STRING之间的部分看作一条记录。
</p>
</section>
<section>
<title>在streaming应用程序中如何更新计数器</title>
<p>
streaming进程能够使用stderr发出计数器信息。
<code>reporter:counter:&lt;group&gt;,&lt;counter&gt;,&lt;amount&gt;</code>
应该被发送到stderr来更新计数器。
</p>
</section>
<section>
<title>如何更新streaming应用程序的状态</title>
<p>
streaming进程能够使用stderr发出状态信息。
<code>reporter:status:&lt;message&gt;</code> 要被发送到stderr来设置状态。
</p>
</section>
</section>
</body>
</document>

View File

@ -0,0 +1,37 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!DOCTYPE tabs PUBLIC "-//APACHE//DTD Cocoon Documentation Tab V1.0//EN"
"http://forrest.apache.org/dtd/tab-cocoon-v10.dtd">
<tabs software="Hadoop"
title="Hadoop"
copyright="The Apache Software Foundation"
xmlns:xlink="http://www.w3.org/1999/xlink">
<!-- The rules are:
@dir will always have /index.html added.
@href is not modified unless it is root-relative and obviously specifies a
directory (ends in '/'), in which case /index.html will be added
-->
<tab label="项目" href="http://hadoop.apache.org/core/" />
<tab label="维基" href="http://wiki.apache.org/hadoop" />
<tab label="Hadoop 0.18文档" dir="" />
</tabs>

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 6.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 766 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 125 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 40 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 29 KiB

View File

@ -0,0 +1,346 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Skin configuration file. This file contains details of your project,
which will be used to configure the chosen Forrest skin.
-->
<!DOCTYPE skinconfig PUBLIC "-//APACHE//DTD Skin Configuration V0.6-3//EN" "http://forrest.apache.org/dtd/skinconfig-v06-3.dtd">
<skinconfig>
<!-- To enable lucene search add provider="lucene" (default is google).
Add box-location="alt" to move the search box to an alternate location
(if the skin supports it) and box-location="all" to show it in all
available locations on the page. Remove the <search> element to show
no search box. @domain will enable sitesearch for the specific domain with google.
In other words google will search the @domain for the query string.
-->
<search name="Lucene" domain="hadoop.apache.org" provider="google"/>
<!-- Disable the print link? If enabled, invalid HTML 4.0.1 -->
<disable-print-link>true</disable-print-link>
<!-- Disable the PDF link? -->
<disable-pdf-link>false</disable-pdf-link>
<!-- Disable the POD link? -->
<disable-pod-link>true</disable-pod-link>
<!-- Disable the Text link? FIXME: NOT YET IMPLEMENETED. -->
<disable-txt-link>true</disable-txt-link>
<!-- Disable the xml source link? -->
<!-- The xml source link makes it possible to access the xml rendition
of the source frim the html page, and to have it generated statically.
This can be used to enable other sites and services to reuse the
xml format for their uses. Keep this disabled if you don't want other
sites to easily reuse your pages.-->
<disable-xml-link>true</disable-xml-link>
<!-- Disable navigation icons on all external links? -->
<disable-external-link-image>true</disable-external-link-image>
<!-- Disable w3c compliance links?
Use e.g. align="center" to move the compliance links logos to
an alternate location default is left.
(if the skin supports it) -->
<disable-compliance-links>true</disable-compliance-links>
<!-- Render mailto: links unrecognisable by spam harvesters? -->
<obfuscate-mail-links>false</obfuscate-mail-links>
<!-- Disable the javascript facility to change the font size -->
<disable-font-script>true</disable-font-script>
<!-- project logo -->
<project-name>Hadoop</project-name>
<project-description>Scalable Computing Platform</project-description>
<project-url>http://hadoop.apache.org/core/</project-url>
<project-logo>images/core-logo.gif</project-logo>
<!-- group logo -->
<group-name>Hadoop</group-name>
<group-description>Apache Hadoop</group-description>
<group-url>http://hadoop.apache.org/</group-url>
<group-logo>images/hadoop-logo.jpg</group-logo>
<!-- optional host logo (e.g. sourceforge logo)
default skin: renders it at the bottom-left corner -->
<host-url></host-url>
<host-logo></host-logo>
<!-- relative url of a favicon file, normally favicon.ico -->
<favicon-url>images/favicon.ico</favicon-url>
<!-- The following are used to construct a copyright statement -->
<year>2009</year>
<vendor>The Apache Software Foundation.</vendor>
<copyright-link>http://www.apache.org/licenses/</copyright-link>
<!-- Some skins use this to form a 'breadcrumb trail' of links.
Use location="alt" to move the trail to an alternate location
(if the skin supports it).
Omit the location attribute to display the trail in the default location.
Use location="none" to not display the trail (if the skin supports it).
For some skins just set the attributes to blank.
-->
<trail>
<link1 name="Apache" href="http://www.apache.org/"/>
<link2 name="Hadoop" href="http://hadoop.apache.org/"/>
<link3 name="Core" href="http://hadoop.apache.org/core/"/>
</trail>
<!-- Configure the TOC, i.e. the Table of Contents.
@max-depth
how many "section" levels need to be included in the
generated Table of Contents (TOC).
@min-sections
Minimum required to create a TOC.
@location ("page","menu","page,menu", "none")
Where to show the TOC.
-->
<toc max-depth="2" min-sections="1" location="page"/>
<!-- Heading types can be clean|underlined|boxed -->
<headings type="clean"/>
<!-- The optional feedback element will be used to construct a
feedback link in the footer with the page pathname appended:
<a href="@href">{@to}</a>
<feedback to="webmaster@foo.com"
href="mailto:webmaster@foo.com?subject=Feedback&#160;" >
Send feedback about the website to:
</feedback>
-->
<!--
extra-css - here you can define custom css-elements that are
a. overriding the fallback elements or
b. adding the css definition from new elements that you may have
used in your documentation.
-->
<extra-css>
<!--Example of b.
To define the css definition of a new element that you may have used
in the class attribute of a <p> node.
e.g. <p class="quote"/>
-->
p.quote {
margin-left: 2em;
padding: .5em;
background-color: #f0f0f0;
font-family: monospace;
}
</extra-css>
<colors>
<!-- These values are used for the generated CSS files. -->
<!-- Krysalis -->
<!--
<color name="header" value="#FFFFFF"/>
<color name="tab-selected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="tab-unselected" value="#F7F7F7" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="subtab-selected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="subtab-unselected" value="#a5b6c6" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="heading" value="#a5b6c6"/>
<color name="subheading" value="#CFDCED"/>
<color name="navstrip" value="#CFDCED" font="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="toolbox" value="#a5b6c6"/>
<color name="border" value="#a5b6c6"/>
<color name="menu" value="#F7F7F7" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="dialog" value="#F7F7F7"/>
<color name="body" value="#ffffff" link="#0F3660" vlink="#009999" hlink="#000066"/>
<color name="table" value="#a5b6c6"/>
<color name="table-cell" value="#ffffff"/>
<color name="highlight" value="#ffff00"/>
<color name="fixme" value="#cc6600"/>
<color name="note" value="#006699"/>
<color name="warning" value="#990000"/>
<color name="code" value="#a5b6c6"/>
<color name="footer" value="#a5b6c6"/>
-->
<!-- Forrest -->
<!--
<color name="header" value="#294563"/>
<color name="tab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
<color name="tab-unselected" value="#b5c7e7" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
<color name="subtab-selected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
<color name="subtab-unselected" value="#4a6d8c" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
<color name="heading" value="#294563"/>
<color name="subheading" value="#4a6d8c"/>
<color name="navstrip" value="#cedfef" font="#0F3660" link="#0F3660" vlink="#0F3660" hlink="#000066"/>
<color name="toolbox" value="#4a6d8c"/>
<color name="border" value="#294563"/>
<color name="menu" value="#4a6d8c" font="#cedfef" link="#ffffff" vlink="#ffffff" hlink="#ffcf00"/>
<color name="dialog" value="#4a6d8c"/>
<color name="body" value="#ffffff" link="#0F3660" vlink="#009999" hlink="#000066"/>
<color name="table" value="#7099C5"/>
<color name="table-cell" value="#f0f0ff"/>
<color name="highlight" value="#ffff00"/>
<color name="fixme" value="#cc6600"/>
<color name="note" value="#006699"/>
<color name="warning" value="#990000"/>
<color name="code" value="#CFDCED"/>
<color name="footer" value="#cedfef"/>
-->
<!-- Collabnet -->
<!--
<color name="header" value="#003366"/>
<color name="tab-selected" value="#dddddd" link="#555555" vlink="#555555" hlink="#555555"/>
<color name="tab-unselected" value="#999999" link="#ffffff" vlink="#ffffff" hlink="#ffffff"/>
<color name="subtab-selected" value="#cccccc" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="subtab-unselected" value="#cccccc" link="#555555" vlink="#555555" hlink="#555555"/>
<color name="heading" value="#003366"/>
<color name="subheading" value="#888888"/>
<color name="navstrip" value="#dddddd" font="#555555"/>
<color name="toolbox" value="#dddddd" font="#555555"/>
<color name="border" value="#999999"/>
<color name="menu" value="#ffffff"/>
<color name="dialog" value="#eeeeee"/>
<color name="body" value="#ffffff"/>
<color name="table" value="#ccc"/>
<color name="table-cell" value="#ffffff"/>
<color name="highlight" value="#ffff00"/>
<color name="fixme" value="#cc6600"/>
<color name="note" value="#006699"/>
<color name="warning" value="#990000"/>
<color name="code" value="#003366"/>
<color name="footer" value="#ffffff"/>
-->
<!-- Lenya using pelt-->
<!--
<color name="header" value="#ffffff"/>
<color name="tab-selected" value="#4C6C8F" link="#ffffff" vlink="#ffffff" hlink="#ffffff"/>
<color name="tab-unselected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="subtab-selected" value="#000000" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="subtab-unselected" value="#E5E4D9" link="#000000" vlink="#000000" hlink="#000000"/>
<color name="heading" value="#E5E4D9"/>
<color name="subheading" value="#000000"/>
<color name="published" value="#4C6C8F" font="#FFFFFF"/>
<color name="feedback" value="#4C6C8F" font="#FFFFFF" align="center"/>
<color name="navstrip" value="#E5E4D9" font="#000000"/>
<color name="toolbox" value="#CFDCED" font="#000000"/>
<color name="border" value="#999999"/>
<color name="menu" value="#4C6C8F" font="#ffffff" link="#ffffff" vlink="#ffffff" hlink="#ffffff" current="#FFCC33" />
<color name="menuheading" value="#cfdced" font="#000000" />
<color name="searchbox" value="#E5E4D9" font="#000000"/>
<color name="dialog" value="#CFDCED"/>
<color name="body" value="#ffffff" />
<color name="table" value="#ccc"/>
<color name="table-cell" value="#ffffff"/>
<color name="highlight" value="#ffff00"/>
<color name="fixme" value="#cc6600"/>
<color name="note" value="#006699"/>
<color name="warning" value="#990000"/>
<color name="code" value="#003366"/>
<color name="footer" value="#E5E4D9"/>
-->
</colors>
<!-- Settings specific to PDF output. -->
<pdf>
<!--
Supported page sizes are a0, a1, a2, a3, a4, a5, executive,
folio, legal, ledger, letter, quarto, tabloid (default letter).
Supported page orientations are portrait, landscape (default
portrait).
Supported text alignments are left, right, justify (default left).
-->
<page size="letter" orientation="portrait" text-align="left"/>
<!--
Margins can be specified for top, bottom, inner, and outer
edges. If double-sided="false", the inner edge is always left
and the outer is always right. If double-sided="true", the
inner edge will be left on odd pages, right on even pages,
the outer edge vice versa.
Specified below are the default settings.
-->
<margins double-sided="false">
<top>1in</top>
<bottom>1in</bottom>
<inner>1.25in</inner>
<outer>1in</outer>
</margins>
<!--
Print the URL text next to all links going outside the file
-->
<show-external-urls>false</show-external-urls>
<!--
Disable the copyright footer on each page of the PDF.
A footer is composed for each page. By default, a "credit" with role=pdf
will be used, as explained below. Otherwise a copyright statement
will be generated. This latter can be disabled.
-->
<disable-copyright-footer>false</disable-copyright-footer>
</pdf>
<!-- Credits are typically rendered as a set of small clickable
images in the page footer.
Use box-location="alt" to move the credit to an alternate location
(if the skin supports it).
-->
<credits>
<credit box-location="alt">
<name>Built with Apache Forrest</name>
<url>http://forrest.apache.org/</url>
<image>images/built-with-forrest-button.png</image>
<width>88</width>
<height>31</height>
</credit>
<!-- A credit with @role="pdf" will be used to compose a footer
for each page in the PDF, using either "name" or "url" or both.
-->
<!--
<credit role="pdf">
<name>Built with Apache Forrest</name>
<url>http://forrest.apache.org/</url>
</credit>
-->
</credits>
</skinconfig>

View File

@ -0,0 +1,78 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!-- This is not used by Forrest but makes it possible to debug the
stylesheet in standalone editors -->
<xsl:output method = "text" omit-xml-declaration="yes" />
<!--
If the skin doesn't override this, at least aural styles
and extra-css are present
-->
<xsl:template match="skinconfig">
<xsl:call-template name="aural"/>
<xsl:call-template name="a-external"/>
<xsl:apply-templates/>
<xsl:call-template name="add-extra-css"/>
</xsl:template>
<xsl:template match="colors">
<xsl:apply-templates/>
</xsl:template>
<xsl:template name="aural">
/* ==================== aural ============================ */
@media aural {
h1, h2, h3, h4, h5, h6 { voice-family: paul, male; stress: 20; richness: 90 }
h1 { pitch: x-low; pitch-range: 90 }
h2 { pitch: x-low; pitch-range: 80 }
h3 { pitch: low; pitch-range: 70 }
h4 { pitch: medium; pitch-range: 60 }
h5 { pitch: medium; pitch-range: 50 }
h6 { pitch: medium; pitch-range: 40 }
li, dt, dd { pitch: medium; richness: 60 }
dt { stress: 80 }
pre, code, tt { pitch: medium; pitch-range: 0; stress: 0; richness: 80 }
em { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
strong { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
dfn { pitch: high; pitch-range: 60; stress: 60 }
s, strike { richness: 0 }
i { pitch: medium; pitch-range: 60; stress: 60; richness: 50 }
b { pitch: medium; pitch-range: 60; stress: 90; richness: 90 }
u { richness: 0 }
:link { voice-family: harry, male }
:visited { voice-family: betty, female }
:active { voice-family: betty, female; pitch-range: 80; pitch: x-high }
}
</xsl:template>
<xsl:template name="a-external">
a.external {
padding: 0 20px 0px 0px;
display:inline;
background-repeat: no-repeat;
background-position: center right;
background-image: url(images/external-link.gif);
}
</xsl:template>
<xsl:template name="add-extra-css">
<xsl:text>/* extra-css */</xsl:text>
<xsl:value-of select="extra-css"/>
</xsl:template>
<xsl:template match="*"></xsl:template>
<xsl:template match="text()"></xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1 @@
The images in this directory are used if the current skin lacks them.

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.9 KiB

View File

@ -0,0 +1,92 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:param name="orientation-tb"/>
<xsl:param name="orientation-lr"/>
<xsl:param name="size"/>
<xsl:param name="bg-color-name"/>
<xsl:param name="stroke-color-name"/>
<xsl:param name="fg-color-name"/>
<!-- if not all colors are present, don't even try to render the corners -->
<xsl:variable name="isize">
<xsl:choose>
<xsl:when test="$bg-color-name and $stroke-color-name and $fg-color-name">
<xsl:value-of select="$size"/>
</xsl:when>
<xsl:otherwise>0</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="smallersize" select="number($isize)-1"/>
<xsl:variable name="biggersize" select="number($isize)+1"/>
<xsl:variable name="bg">
<xsl:if test="skinconfig/colors/color[@name=$bg-color-name]">fill:<xsl:value-of select="skinconfig/colors/color[@name=$bg-color-name]/@value"/>;</xsl:if>
</xsl:variable>
<xsl:variable name="fill">
<xsl:if test="skinconfig/colors/color[@name=$stroke-color-name]">fill:<xsl:value-of select="skinconfig/colors/color[@name=$stroke-color-name]/@value"/>;</xsl:if>
</xsl:variable>
<xsl:variable name="stroke">
<xsl:if test="skinconfig/colors/color[@name=$fg-color-name]">stroke:<xsl:value-of select="skinconfig/colors/color[@name=$fg-color-name]/@value"/>;</xsl:if>
</xsl:variable>
<xsl:template match="skinconfig">
<svg width="{$isize}" height="{$isize}">
<!-- background-->
<rect x="-1" y="-1" width="{$biggersize}" height="{$biggersize}" style="{$bg}stroke-width:0"/>
<!-- 0,0 0,-4 4,0 4,-4-->
<xsl:variable name="flip-tb-scale">
<xsl:choose>
<xsl:when test="$orientation-tb='t'">1</xsl:when>
<xsl:otherwise>-1</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="flip-lr-scale">
<xsl:choose>
<xsl:when test="$orientation-lr='l'">1</xsl:when>
<xsl:otherwise>-1</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="flip-tb-translate">
<xsl:choose>
<xsl:when test="$orientation-tb='t'">0</xsl:when>
<xsl:otherwise>-<xsl:value-of select="$isize" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<xsl:variable name="flip-lr-translate">
<xsl:choose>
<xsl:when test="$orientation-lr='l'">0</xsl:when>
<xsl:otherwise>-<xsl:value-of select="$isize" />
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<!-- flip transform -->
<g transform="scale({$flip-lr-scale},{$flip-tb-scale}) translate({$flip-lr-translate}, {$flip-tb-translate})">
<xsl:call-template name="figure" />
</g>
</svg>
</xsl:template>
<xsl:template name="figure">
<!-- Just change shape here -->
<g transform="translate(0.5 0.5)">
<ellipse cx="{$smallersize}" cy="{$smallersize}" rx="{$smallersize}" ry="{$smallersize}"
style="{$fill}{$stroke}stroke-width:1"/>
</g>
<!-- end -->
</xsl:template>
<xsl:template match="*"></xsl:template>
<xsl:template match="text()"></xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,28 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:import href="corner-imports.svg.xslt" />
<!-- Diagonal 45 degrees corner -->
<xsl:template name="figure">
<xsl:variable name="biggersize" select="number($size)+number($size)"/>
<g transform="translate(0 0.5)">
<polygon points="0,{$size} {$size},0 {$biggersize},0 {$biggersize},{$biggersize} 0,{$biggersize}"
style="{$fill}{$stroke}stroke-width:1"/>
</g>
</xsl:template>
</xsl:stylesheet>

Binary file not shown.

After

Width:  |  Height:  |  Size: 71 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 932 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 4.5 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 743 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 457 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 856 B

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<svg width="20pt" height="20pt"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink">
<defs
id="defs550">
<linearGradient id="gray2white">
<stop style="stop-color:#7f7f7f;stop-opacity:1;" offset="0.000000"/>
<stop style="stop-color:#ffffff;stop-opacity:1;" offset="1.000000"/>
</linearGradient>
<linearGradient id="pageshade" xlink:href="#gray2white"
x1="0.95" y1="0.95"
x2="0.40" y2="0.20"
gradientUnits="objectBoundingBox" spreadMethod="pad" />
<path d="M 0 0 L 200 0" style="stroke:#000000;stroke-width:1pt;" id="hr"/>
</defs>
<g transform="scale(0.08)">
<g transform="translate(40, 0)">
<rect width="230" height="300" x="0" y="0"
style="fill:url(#pageshade);fill-rule:evenodd;
stroke:#000000;stroke-width:1.25;"/>
<g transform="translate(15, 60)">
<use xlink:href="#hr" x="0" y="0"/>
<use xlink:href="#hr" x="0" y="60"/>
<use xlink:href="#hr" x="0" y="120"/>
<use xlink:href="#hr" x="0" y="180"/>
</g>
</g>
<g transform="translate(0,70),scale(1.1,1.6)">
<rect width="200" height="100" x="0" y="0"
style="fill:#ff0000;fill-rule:evenodd;
stroke:#000000;stroke-width:2.33903;"/>
<text x="20" y="75"
style="stroke:#ffffff;stroke-width:1.0;
font-size:72;font-weight:normal;fill:#ffffff;
font-family:Arial;text-anchor:start;">POD</text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 438 B

View File

@ -0,0 +1,27 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:import href="corner-imports.svg.xslt" />
<!-- Rounded corner -->
<xsl:template name="figure">
<g transform="translate(0.5 0.5)">
<ellipse cx="{$smallersize}" cy="{$smallersize}" rx="{$smallersize}" ry="{$smallersize}"
style="{$fill}{$stroke}stroke-width:1"/>
</g>
</xsl:template>
</xsl:stylesheet>

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.2 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 360 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 43 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 784 B

View File

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<svg width="20pt" height="20pt"
xmlns="http://www.w3.org/2000/svg"
xmlns:xlink="http://www.w3.org/1999/xlink">
<defs
id="defs550">
<linearGradient id="gray2white">
<stop style="stop-color:#7f7f7f;stop-opacity:1;" offset="0.000000"/>
<stop style="stop-color:#ffffff;stop-opacity:1;" offset="1.000000"/>
</linearGradient>
<linearGradient id="pageshade" xlink:href="#gray2white"
x1="0.95" y1="0.95"
x2="0.40" y2="0.20"
gradientUnits="objectBoundingBox" spreadMethod="pad" />
<path d="M 0 0 L 200 0" style="stroke:#000000;stroke-width:1pt;" id="hr"/>
</defs>
<g transform="scale(0.08)">
<g transform="translate(40, 0)">
<rect width="230" height="300" x="0" y="0"
style="fill:url(#pageshade);fill-rule:evenodd;
stroke:#000000;stroke-width:1.25;"/>
<g transform="translate(15, 60)">
<use xlink:href="#hr" x="0" y="0"/>
<use xlink:href="#hr" x="0" y="60"/>
<use xlink:href="#hr" x="0" y="120"/>
<use xlink:href="#hr" x="0" y="180"/>
</g>
</g>
<g transform="translate(0,70),scale(1.1,1.6)">
<rect width="200" height="100" x="0" y="0"
style="fill:#ff0000;fill-rule:evenodd;
stroke:#000000;stroke-width:2.33903;"/>
<text x="20" y="75"
style="stroke:#ffffff;stroke-width:1.0;
font-size:72;font-weight:normal;fill:#ffffff;
font-family:Arial;text-anchor:start;">TXT</text>
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 2.3 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 990 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.9 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 1.1 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 647 B

View File

@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
var PREPREND_CRUMBS=new Array();
var link1="@skinconfig.trail.link1.name@";
var link2="@skinconfig.trail.link2.name@";
var link3="@skinconfig.trail.link3.name@";
if(!(link1=="")&&!link1.indexOf( "@" ) == 0){
PREPREND_CRUMBS.push( new Array( link1, @skinconfig.trail.link1.href@ ) ); }
if(!(link2=="")&&!link2.indexOf( "@" ) == 0){
PREPREND_CRUMBS.push( new Array( link2, @skinconfig.trail.link2.href@ ) ); }
if(!(link3=="")&&!link3.indexOf( "@" ) == 0){
PREPREND_CRUMBS.push( new Array( link3, @skinconfig.trail.link3.href@ ) ); }
var DISPLAY_SEPARATOR=" &gt; ";
var DISPLAY_PREPREND=" &gt; ";
var DISPLAY_POSTPREND=":";
var CSS_CLASS_CRUMB="breadcrumb";
var CSS_CLASS_TRAIL="breadcrumbTrail";
var CSS_CLASS_SEPARATOR="crumbSeparator";
var FILE_EXTENSIONS=new Array( ".html", ".htm", ".jsp", ".php", ".php3", ".php4" );
var PATH_SEPARATOR="/";
function sc(s) {
var l=s.toLowerCase();
return l.substr(0,1).toUpperCase()+l.substr(1);
}
function getdirs() {
var t=document.location.pathname.split(PATH_SEPARATOR);
var lc=t[t.length-1];
for(var i=0;i < FILE_EXTENSIONS.length;i++)
{
if(lc.indexOf(FILE_EXTENSIONS[i]))
return t.slice(1,t.length-1); }
return t.slice(1,t.length);
}
function getcrumbs( d )
{
var pre = "/";
var post = "/";
var c = new Array();
if( d != null )
{
for(var i=0;i < d.length;i++) {
pre+=d[i]+postfix;
c.push(new Array(d[i],pre)); }
}
if(PREPREND_CRUMBS.length > 0 )
return PREPREND_CRUMBS.concat( c );
return c;
}
function gettrail( c )
{
var h=DISPLAY_PREPREND;
for(var i=0;i < c.length;i++)
{
h+='<a href="'+c[i][1]+'" >'+sc(c[i][0])+'</a>';
if(i!=(c.length-1))
h+=DISPLAY_SEPARATOR; }
return h+DISPLAY_POSTPREND;
}
function gettrailXHTML( c )
{
var h='<span class="'+CSS_CLASS_TRAIL+'">'+DISPLAY_PREPREND;
for(var i=0;i < c.length;i++)
{
h+='<a href="'+c[i][1]+'" class="'+CSS_CLASS_CRUMB+'">'+sc(c[i][0])+'</a>';
if(i!=(c.length-1))
h+='<span class="'+CSS_CLASS_SEPARATOR+'">'+DISPLAY_SEPARATOR+'</span>'; }
return h+DISPLAY_POSTPREND+'</span>';
}
if(document.location.href.toLowerCase().indexOf("http://")==-1)
document.write(gettrail(getcrumbs()));
else
document.write(gettrail(getcrumbs(getdirs())));

View File

@ -0,0 +1,237 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This script, when included in a html file, builds a neat breadcrumb trail
* based on its url. That is, if it doesn't contains bugs (I'm relatively
* sure it does).
*
* Typical usage:
* <script type="text/javascript" language="JavaScript" src="breadcrumbs.js"></script>
*/
/**
* IE 5 on Mac doesn't know Array.push.
*
* Implement it - courtesy to fritz.
*/
var abc = new Array();
if (!abc.push) {
Array.prototype.push = function(what){this[this.length]=what}
}
/* ========================================================================
CONSTANTS
======================================================================== */
/**
* Two-dimensional array containing extra crumbs to place at the front of
* the trail. Specify first the name of the crumb, then the URI that belongs
* to it. You'll need to modify this for every domain or subdomain where
* you use this script (you can leave it as an empty array if you wish)
*/
var PREPREND_CRUMBS = new Array();
var link1 = "@skinconfig.trail.link1.name@";
var link2 = "@skinconfig.trail.link2.name@";
var link3 = "@skinconfig.trail.link3.name@";
var href1 = "@skinconfig.trail.link1.href@";
var href2 = "@skinconfig.trail.link2.href@";
var href3 = "@skinconfig.trail.link3.href@";
if(!(link1=="")&&!link1.indexOf( "@" ) == 0){
PREPREND_CRUMBS.push( new Array( link1, href1 ) );
}
if(!(link2=="")&&!link2.indexOf( "@" ) == 0){
PREPREND_CRUMBS.push( new Array( link2, href2 ) );
}
if(!(link3=="")&&!link3.indexOf( "@" ) == 0){
PREPREND_CRUMBS.push( new Array( link3, href3 ) );
}
/**
* String to include between crumbs:
*/
var DISPLAY_SEPARATOR = " &gt; ";
/**
* String to include at the beginning of the trail
*/
var DISPLAY_PREPREND = " &gt; ";
/**
* String to include at the end of the trail
*/
var DISPLAY_POSTPREND = "";
/**
* CSS Class to use for a single crumb:
*/
var CSS_CLASS_CRUMB = "breadcrumb";
/**
* CSS Class to use for the complete trail:
*/
var CSS_CLASS_TRAIL = "breadcrumbTrail";
/**
* CSS Class to use for crumb separator:
*/
var CSS_CLASS_SEPARATOR = "crumbSeparator";
/**
* Array of strings containing common file extensions. We use this to
* determine what part of the url to ignore (if it contains one of the
* string specified here, we ignore it).
*/
var FILE_EXTENSIONS = new Array( ".html", ".htm", ".jsp", ".php", ".php3", ".php4" );
/**
* String that separates parts of the breadcrumb trail from each other.
* When this is no longer a slash, I'm sure I'll be old and grey.
*/
var PATH_SEPARATOR = "/";
/* ========================================================================
UTILITY FUNCTIONS
======================================================================== */
/**
* Capitalize first letter of the provided string and return the modified
* string.
*/
function sentenceCase( string )
{ return string;
//var lower = string.toLowerCase();
//return lower.substr(0,1).toUpperCase() + lower.substr(1);
}
/**
* Returns an array containing the names of all the directories in the
* current document URL
*/
function getDirectoriesInURL()
{
var trail = document.location.pathname.split( PATH_SEPARATOR );
// check whether last section is a file or a directory
var lastcrumb = trail[trail.length-1];
for( var i = 0; i < FILE_EXTENSIONS.length; i++ )
{
if( lastcrumb.indexOf( FILE_EXTENSIONS[i] ) )
{
// it is, remove it and send results
return trail.slice( 1, trail.length-1 );
}
}
// it's not; send the trail unmodified
return trail.slice( 1, trail.length );
}
/* ========================================================================
BREADCRUMB FUNCTIONALITY
======================================================================== */
/**
* Return a two-dimensional array describing the breadcrumbs based on the
* array of directories passed in.
*/
function getBreadcrumbs( dirs )
{
var prefix = "/";
var postfix = "/";
// the array we will return
var crumbs = new Array();
if( dirs != null )
{
for( var i = 0; i < dirs.length; i++ )
{
prefix += dirs[i] + postfix;
crumbs.push( new Array( dirs[i], prefix ) );
}
}
// preprend the PREPREND_CRUMBS
if(PREPREND_CRUMBS.length > 0 )
{
return PREPREND_CRUMBS.concat( crumbs );
}
return crumbs;
}
/**
* Return a string containing a simple text breadcrumb trail based on the
* two-dimensional array passed in.
*/
function getCrumbTrail( crumbs )
{
var xhtml = DISPLAY_PREPREND;
for( var i = 0; i < crumbs.length; i++ )
{
xhtml += '<a href="' + crumbs[i][1] + '" >';
xhtml += unescape( crumbs[i][0] ) + '</a>';
if( i != (crumbs.length-1) )
{
xhtml += DISPLAY_SEPARATOR;
}
}
xhtml += DISPLAY_POSTPREND;
return xhtml;
}
/**
* Return a string containing an XHTML breadcrumb trail based on the
* two-dimensional array passed in.
*/
function getCrumbTrailXHTML( crumbs )
{
var xhtml = '<span class="' + CSS_CLASS_TRAIL + '">';
xhtml += DISPLAY_PREPREND;
for( var i = 0; i < crumbs.length; i++ )
{
xhtml += '<a href="' + crumbs[i][1] + '" class="' + CSS_CLASS_CRUMB + '">';
xhtml += unescape( crumbs[i][0] ) + '</a>';
if( i != (crumbs.length-1) )
{
xhtml += '<span class="' + CSS_CLASS_SEPARATOR + '">' + DISPLAY_SEPARATOR + '</span>';
}
}
xhtml += DISPLAY_POSTPREND;
xhtml += '</span>';
return xhtml;
}
/* ========================================================================
PRINT BREADCRUMB TRAIL
======================================================================== */
// check if we're local; if so, only print the PREPREND_CRUMBS
if( document.location.href.toLowerCase().indexOf( "http://" ) == -1 )
{
document.write( getCrumbTrail( getBreadcrumbs() ) );
}
else
{
document.write( getCrumbTrail( getBreadcrumbs( getDirectoriesInURL() ) ) );
}

View File

@ -0,0 +1,166 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
function init()
{ //embedded in the doc
//ndeSetTextSize();
}
function checkBrowser(){
if (!document.getElementsByTagName){
return true;
}
else{
return false;
}
}
function ndeSetTextSize(chgsize,rs)
{
var startSize;
var newSize;
if (!checkBrowser)
{
return;
}
startSize = parseInt(ndeGetDocTextSize());
if (!startSize)
{
startSize = 16;
}
switch (chgsize)
{
case 'incr':
newSize = startSize + 2;
break;
case 'decr':
newSize = startSize - 2;
break;
case 'reset':
if (rs) {newSize = rs;} else {newSize = 16;}
break;
default:
try{
newSize = parseInt(ndeReadCookie("nde-textsize"));
}
catch(e){
alert(e);
}
if (!newSize || newSize == 'NaN')
{
newSize = startSize;
}
break;
}
if (newSize < 10)
{
newSize = 10;
}
newSize += 'px';
document.getElementsByTagName('html')[0].style.fontSize = newSize;
document.getElementsByTagName('body')[0].style.fontSize = newSize;
ndeCreateCookie("nde-textsize", newSize, 365);
}
function ndeGetDocTextSize()
{
if (!checkBrowser)
{
return 0;
}
var size = 0;
var body = document.getElementsByTagName('body')[0];
if (body.style && body.style.fontSize)
{
size = body.style.fontSize;
}
else if (typeof(getComputedStyle) != 'undefined')
{
size = getComputedStyle(body,'').getPropertyValue('font-size');
}
else if (body.currentStyle)
{
size = body.currentStyle.fontSize;
}
//fix IE bug
if( isNaN(size)){
if(size.substring(size.length-1)=="%"){
return
}
}
return size;
}
function ndeCreateCookie(name,value,days)
{
var cookie = name + "=" + value + ";";
if (days)
{
var date = new Date();
date.setTime(date.getTime()+(days*24*60*60*1000));
cookie += " expires=" + date.toGMTString() + ";";
}
cookie += " path=/";
document.cookie = cookie;
}
function ndeReadCookie(name)
{
var nameEQ = name + "=";
var ca = document.cookie.split(';');
for(var i = 0; i < ca.length; i++)
{
var c = ca[i];
while (c.charAt(0) == ' ')
{
c = c.substring(1, c.length);
}
ctest = c.substring(0,name.length);
if(ctest == name){
return c.substring(nameEQ.length,c.length);
}
}
return null;
}

View File

@ -0,0 +1,40 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* getBlank script - when included in a html file and called from a form text field, will set the value of this field to ""
* if the text value is still the standard value.
* getPrompt script - when included in a html file and called from a form text field, will set the value of this field to the prompt
* if the text value is empty.
*
* Typical usage:
* <script type="text/javascript" language="JavaScript" src="getBlank.js"></script>
* <input type="text" id="query" value="Search the site:" onFocus="getBlank (this, 'Search the site:');" onBlur="getBlank (this, 'Search the site:');"/>
*/
<!--
function getBlank (form, stdValue){
if (form.value == stdValue){
form.value = '';
}
return true;
}
function getPrompt (form, stdValue){
if (form.value == ''){
form.value = stdValue;
}
return true;
}
//-->

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This script, when included in a html file, can be used to make collapsible menus
*
* Typical usage:
* <script type="text/javascript" language="JavaScript" src="menu.js"></script>
*/
if (document.getElementById){
document.write('<style type="text/css">.menuitemgroup{display: none;}</style>')
}
function SwitchMenu(obj, thePath)
{
var open = 'url("'+thePath + 'images/chapter_open.gif")';
var close = 'url("'+thePath + 'images/chapter.gif")';
if(document.getElementById) {
var el = document.getElementById(obj);
var title = document.getElementById(obj+'Title');
if(el.style.display != "block"){
title.style.backgroundImage = open;
el.style.display = "block";
}else{
title.style.backgroundImage = close;
el.style.display = "none";
}
}// end - if(document.getElementById)
}//end - function SwitchMenu(obj)

View File

@ -0,0 +1,48 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This script, when included in a html file, can be used to make collapsible menus
*
* Typical usage:
* <script type="text/javascript" language="JavaScript" src="menu.js"></script>
*/
if (document.getElementById){
document.write('<style type="text/css">.menuitemgroup{display: none;}</style>')
}
function SwitchMenu(obj)
{
if(document.getElementById) {
var el = document.getElementById(obj);
var title = document.getElementById(obj+'Title');
if(obj.indexOf("_selected_")==0&&el.style.display == ""){
el.style.display = "block";
title.className = "pagegroupselected";
}
if(el.style.display != "block"){
el.style.display = "block";
title.className = "pagegroupopen";
}
else{
el.style.display = "none";
title.className = "pagegroup";
}
}// end - if(document.getElementById)
}//end - function SwitchMenu(obj)

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,238 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template match="skinconfig">
<xsl:copy>
<xsl:if test="not(disable-print-link)">
<disable-print-link>true</disable-print-link>
</xsl:if>
<xsl:if test="not(disable-pdf-link)">
<disable-pdf-link>true</disable-pdf-link>
</xsl:if>
<xsl:if test="not(disable-txt-link)">
<disable-txt-link>true</disable-txt-link>
</xsl:if>
<xsl:if test="not(disable-pod-link)">
<disable-pod-link>true</disable-pod-link>
</xsl:if>
<xsl:if test="not(disable-xml-link)">
<disable-xml-link>true</disable-xml-link>
</xsl:if>
<xsl:if test="not(disable-external-link-image)">
<disable-external-link-image>false</disable-external-link-image>
</xsl:if>
<xsl:if test="not(disable-compliance-links)">
<disable-compliance-links>false</disable-compliance-links>
</xsl:if>
<xsl:if test="not(obfuscate-mail-links)">
<obfuscate-mail-links>true</obfuscate-mail-links>
</xsl:if>
<xsl:if test="not(obfuscate-mail-value)">
<obfuscate-mail-value>.at.</obfuscate-mail-value>
</xsl:if>
<xsl:if test="not(disable-font-script)">
<disable-font-script>true</disable-font-script>
</xsl:if>
<!--
<xsl:if test="not(project-name)">
<project-name>MyProject</project-name>
</xsl:if>
<xsl:if test="not(project-description)">
<project-description>MyProject Description</project-description>
</xsl:if>
<xsl:if test="not(project-url)">
<project-url>http://myproj.mygroup.org/</project-url>
</xsl:if>
<xsl:if test="not(project-logo)">
<project-logo>images/project.png</project-logo>
</xsl:if>
<xsl:if test="not(group-name)">
<group-name>MyGroup</group-name>
</xsl:if>
<xsl:if test="not(group-description)">
<group-description>MyGroup Description</group-description>
</xsl:if>
<xsl:if test="not(group-url)">
<group-url>http://mygroup.org</group-url>
</xsl:if>
<xsl:if test="not(group-logo)">
<group-logo>images/group.png</group-logo>
</xsl:if>
<xsl:if test="not(host-url)">
<host-url/>
</xsl:if>
<xsl:if test="not(host-logo)">
<host-logo/>
</xsl:if>
<xsl:if test="not(year)">
<year>2006</year>
</xsl:if>
<xsl:if test="not(vendor)">
<vendor>The Acme Software Foundation.</vendor>
</xsl:if>
-->
<xsl:if test="not(trail)">
<trail>
<link1 name="" href=""/>
<link2 name="" href=""/>
<link3 name="" href=""/>
</trail>
</xsl:if>
<xsl:if test="not(toc)">
<toc level="2" location="page"/>
</xsl:if>
<xsl:if test="not(pdf/page-numbering-format)">
<pdf>
<page-numbering-format>Page 1</page-numbering-format>
</pdf>
</xsl:if>
<xsl:if test="not(pdf/show-external-urls)">
<pdf>
<show-external-urls>true</show-external-urls>
</pdf>
</xsl:if>
<!--
<xsl:if test="not(colors)">
<colors>
<color name="header" value="#294563"/>
<color name="tab-selected" value="#4a6d8c"/>
<color name="tab-unselected" value="#b5c7e7"/>
<color name="subtab-selected" value="#4a6d8c"/>
<color name="subtab-unselected" value="#4a6d8c"/>
<color name="heading" value="#294563"/>
<color name="subheading" value="#4a6d8c"/>
<color name="navstrip" value="#cedfef"/>
<color name="toolbox" value="#294563"/>
<color name="menu" value="#4a6d8c"/>
<color name="dialog" value="#4a6d8c"/>
<color name="body" value="#ffffff"/>
<color name="table" value="#7099C5"/>
<color name="table-cell" value="#f0f0ff"/>
<color name="highlight" value="#ffff00"/>
<color name="fixme" value="#c60"/>
<color name="note" value="#069"/>
<color name="warning" value="#900"/>
<color name="code" value="#CFDCED"/>
<color name="footer" value="#cedfef"/>
</colors>
</xsl:if>
-->
<xsl:if test="not(extra-css)">
<extra-css/>
</xsl:if>
<xsl:if test="not(credits)">
<credits>
<credit>
<name>Built with Apache Forrest</name>
<url>http://forrest.apache.org/</url>
<image>images/built-with-forrest-button.png</image>
<width>88</width>
<height>31</height>
</credit>
<!-- A credit with @role='pdf' will have its name and url displayed in the
PDF page's footer. -->
</credits>
</xsl:if>
<xsl:copy-of select="@*"/>
<xsl:copy-of select="node()"/>
<!--
<xsl:copy-of select="node()[not(name(.)='colors')]"/>
<xsl:apply-templates select="colors"/>-->
</xsl:copy>
</xsl:template>
<!--
<xsl:template match="colors">
<colors>
<xsl:if test="not(color[@name='header'])">
<color name="header" value="#294563"/>
</xsl:if>
<xsl:if test="not(color[@name='tab-selected'])">
<color name="tab-selected" value="#4a6d8c"/>
</xsl:if>
<xsl:if test="not(color[@name='tab-unselected'])">
<color name="tab-unselected" value="#b5c7e7"/>
</xsl:if>
<xsl:if test="not(color[@name='subtab-selected'])">
<color name="subtab-selected" value="#4a6d8c"/>
</xsl:if>
<xsl:if test="not(color[@name='subtab-unselected'])">
<color name="subtab-unselected" value="#4a6d8c"/>
</xsl:if>
<xsl:if test="not(color[@name='heading'])">
<color name="heading" value="#294563"/>
</xsl:if>
<xsl:if test="not(color[@name='subheading'])">
<color name="subheading" value="#4a6d8c"/>
</xsl:if>
<xsl:if test="not(color[@name='navstrip'])">
<color name="navstrip" value="#cedfef"/>
</xsl:if>
<xsl:if test="not(color[@name='toolbox'])">
<color name="toolbox" value="#294563"/>
</xsl:if>
<xsl:if test="not(color[@name='menu'])">
<color name="menu" value="#4a6d8c"/>
</xsl:if>
<xsl:if test="not(color[@name='dialog'])">
<color name="dialog" value="#4a6d8c"/>
</xsl:if>
<xsl:if test="not(color[@name='body'])">
<color name="body" value="#ffffff"/>
</xsl:if>
<xsl:if test="not(color[@name='table'])">
<color name="table" value="#7099C5"/>
</xsl:if>
<xsl:if test="not(color[@name='table-cell'])">
<color name="table-cell" value="#f0f0ff"/>
</xsl:if>
<xsl:if test="not(color[@name='highlight'])">
<color name="highlight" value="#yellow"/>
</xsl:if>
<xsl:if test="not(color[@name='fixme'])">
<color name="fixme" value="#c60"/>
</xsl:if>
<xsl:if test="not(color[@name='note'])">
<color name="note" value="#069"/>
</xsl:if>
<xsl:if test="not(color[@name='warning'])">
<color name="warning" value="#900"/>
</xsl:if>
<xsl:if test="not(color[@name='code'])">
<color name="code" value="#CFDCED"/>
</xsl:if>
<xsl:if test="not(color[@name='footer'])">
<color name="footer" value="#cedfef"/>
</xsl:if>
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:copy-of select="node()[name(.)='color']"/>
</xsl:copy>
</colors>
</xsl:template>
-->
</xsl:stylesheet>

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<catalogue>
<message key="Font size:">Schriftgrösse:</message>
<message key="Last Published:">Zuletzt veröffentlicht:</message>
<message key="Search">Suche:</message>
<message key="Search the site with">Suche auf der Seite mit</message>
</catalogue>

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<catalogue>
<message key="Font size:">Font size:</message>
<message key="Last Published:">Last Published:</message>
<message key="Search">Search</message>
<message key="Search the site with">Search site with</message>
</catalogue>

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<catalogue>
<message key="Font size:">Tamaño del texto:</message>
<message key="Last Published:">Fecha de publicación:</message>
<message key="Search">Buscar</message>
<message key="Search the site with">Buscar en</message>
</catalogue>

View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="ISO-8859-1"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<catalogue>
<message key="Font size:">Taille :</message>
<message key="Last Published:">Dernière publication :</message>
<message key="Search">Rechercher</message>
<message key="Search the site with">Rechercher sur le site avec</message>
</catalogue>

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,70 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fo="http://www.w3.org/1999/XSL/Format"
version="1.0">
<!--
Named template to generate a short message in the PDF footer, from text in
skinconf.xml. By default, the message is a copyright statement. If a credit
with @role='pdf' is present, that is used instead. Eg:
<credit role="pdf">
<name>Generated by Apache FOP 1.0-dev</name>
<url>http://xml.apache.org/fop/dev/</url>
</credit>
-->
<xsl:template name="info">
<xsl:variable name="disable-copyright-footer" select="//skinconfig/pdf/disable-copyright-footer"/>
<xsl:variable name="pdfcredit" select="//skinconfig/credits/credit[@role = 'pdf']"/>
<xsl:variable name="text">
<xsl:if test="$pdfcredit">
<xsl:value-of select="$pdfcredit/name"/>
</xsl:if>
<xsl:if test="not($pdfcredit) and not($disable-copyright-footer = 'true')">
<xsl:text>Copyright &#169; </xsl:text>
<xsl:value-of select="//skinconfig/year"/>&#160;<xsl:value-of
select="//skinconfig/vendor"/>
<xsl:text> All rights reserved.</xsl:text>
</xsl:if>
</xsl:variable>
<xsl:variable name="url" select="$pdfcredit/url"/>
<fo:block-container font-style="italic" absolute-position="absolute"
left="0pt" top="0pt" right="6.25in" bottom="150pt"
font-size="10pt">
<xsl:if test="not($url)">
<fo:block text-align="center" color="lightgrey">
<xsl:value-of select="$text"/>
</fo:block>
</xsl:if>
<xsl:if test="$url">
<fo:block text-align="center">
<fo:basic-link color="lightgrey"
external-destination="{$url}">
<xsl:value-of select="$text"/>
</fo:basic-link>
</fo:block>
<fo:block text-align="center">
<fo:basic-link color="lightgrey"
external-destination="{$url}">
<xsl:value-of select="$url"/>
</fo:basic-link>
</fo:block>
</xsl:if>
</fo:block-container>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,45 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:fo="http://www.w3.org/1999/XSL/Format"
xmlns:fox="http://xml.apache.org/fop/extensions"
version="1.0">
<xsl:template match="document" mode="outline">
<xsl:apply-templates select="body/section" mode="outline"/>
</xsl:template>
<xsl:template match="section" mode="outline">
<fox:outline>
<xsl:attribute name="internal-destination">
<xsl:choose>
<xsl:when test="normalize-space(@id)!=''">
<xsl:value-of select="@id"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="generate-id()"/>
</xsl:otherwise>
</xsl:choose>
</xsl:attribute>
<fox:label>
<xsl:number format="1.1.1.1.1.1.1" count="section" level="multiple"/>
<xsl:text> </xsl:text>
<xsl:value-of select="normalize-space(title)"/>
</fox:label>
<xsl:apply-templates select="section" mode="outline"/>
</fox:outline>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,139 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
book-to-menu.xsl generates the HTML menu. It outputs XML/HTML of the form:
<div class="menu">
...
</div>
which is then merged with other HTML by site-to-xhtml.xsl
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!-- ================================================================ -->
<!-- These templates SHOULD be overridden -->
<!-- ================================================================ -->
<xsl:template name="selected">
<xsl:value-of select="@label"/>
</xsl:template>
<xsl:template name="unselected"><a href="{@href}">
<xsl:if test="@description">
<xsl:attribute name="title">
<xsl:value-of select="@description"/>
</xsl:attribute>
</xsl:if>
<xsl:value-of select="@label"/></a>
</xsl:template>
<xsl:template name="print-external">
<!-- Use apply-imports when overriding -->
<xsl:value-of select="@label"/>
</xsl:template>
<!-- ================================================================ -->
<!-- These templates CAN be overridden -->
<!-- ================================================================ -->
<!-- Eg, if tab href is 'index.html#foo', this will be called when index.html
is selected -->
<xsl:template name="selected-anchor">
<!-- By default, render as unselected so that it is clickable (takes user
to the anchor) -->
<xsl:call-template name="unselected"/>
</xsl:template>
<xsl:template name="unselected-anchor">
<xsl:call-template name="unselected"/>
</xsl:template>
<xsl:template match="book">
<xsl:apply-templates select="menu"/>
</xsl:template>
<xsl:template match="menu">
<div class="menu">
<xsl:call-template name="base-menu"/>
</div>
</xsl:template>
<xsl:template match="menu-item">
<!-- Use apply-imports when overriding -->
<xsl:variable name="href-nofrag">
<xsl:call-template name="path-nofrag">
<xsl:with-param name="path" select="@href"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="node-path">
<xsl:call-template name="normalize">
<xsl:with-param name="path" select="concat($dirname, $href-nofrag)"/>
</xsl:call-template>
</xsl:variable>
<xsl:choose>
<!-- Compare with extensions stripped -->
<xsl:when test="$node-path = $path-nofrag">
<xsl:choose>
<xsl:when test="contains(@href, '#')">
<xsl:call-template name="selected-anchor"/>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="selected"/>
</xsl:otherwise>
</xsl:choose>
</xsl:when>
<xsl:otherwise>
<xsl:choose>
<xsl:when test="contains(@href, '#')">
<xsl:call-template name="unselected-anchor"/>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="unselected"/>
</xsl:otherwise>
</xsl:choose>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- ================================================================ -->
<!-- These templates SHOULD NOT be overridden -->
<!-- ================================================================ -->
<xsl:param name="path"/>
<xsl:include href="pathutils.xsl"/>
<xsl:variable name="filename">
<xsl:call-template name="filename">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="path-nofrag">
<xsl:call-template name="path-nofrag">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="dirname">
<xsl:call-template name="dirname">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<xsl:template match="external">
<li><xsl:choose>
<xsl:when test="starts-with(@href, $path-nofrag)">
<span class="externalSelected">
<xsl:call-template name="print-external"/>
</span>
</xsl:when>
<xsl:otherwise><a href="{@href}" target="_blank">
<xsl:value-of select="@label"/></a>
</xsl:otherwise>
</xsl:choose></li>
</xsl:template>
<xsl:template match="menu-item[@type='hidden']"/>
<xsl:template match="external[@type='hidden']"/>
<xsl:template name="base-menu">
<xsl:apply-templates/>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,374 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This stylesheet contains the majority of templates for converting documentv11
to HTML. It renders XML as HTML in this form:
<div class="content">
...
</div>
..which site-to-xhtml.xsl then combines with HTML from the index (book-to-menu.xsl)
and tabs (tab-to-menu.xsl) to generate the final HTML.
Section handling
- <a name/> anchors are added if the id attribute is specified
-->
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:param name="dynamic-page" select="'false'"/>
<xsl:param name="notoc"/>
<xsl:param name="path"/>
<!-- <xsl:include href="split.xsl"/> -->
<xsl:include href="dotdots.xsl"/>
<xsl:include href="pathutils.xsl"/>
<!-- Path to site root, eg '../../' -->
<xsl:variable name="root">
<xsl:call-template name="dotdots">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="skin-img-dir" select="concat(string($root), 'skin/images')"/>
<xsl:template match="/">
<xsl:apply-templates mode="toc"/>
<xsl:apply-templates/>
</xsl:template>
<xsl:template match="document">
<div class="content">
<table summary="" class="title">
<tr>
<td valign="middle">
<xsl:if test="normalize-space(header/title)!=''">
<h1>
<xsl:value-of select="header/title"/>
</h1>
</xsl:if>
</td>
<div id="skinconf-printlink"/>
<xsl:if test="$dynamic-page='false'">
<div id="skinconf-pdflink"/>
<div id="skinconf-xmllink"/>
</xsl:if>
</tr>
</table>
<xsl:if test="normalize-space(header/subtitle)!=''">
<h3>
<xsl:value-of select="header/subtitle"/>
</h3>
</xsl:if>
<xsl:apply-templates select="header/type"/>
<xsl:apply-templates select="header/notice"/>
<xsl:apply-templates select="header/abstract"/>
<xsl:apply-templates select="body"/>
<div class="attribution">
<xsl:apply-templates select="header/authors"/>
<xsl:if test="header/authors and header/version">
<xsl:text>; </xsl:text>
</xsl:if>
<xsl:apply-templates select="header/version"/>
</div>
</div>
</xsl:template>
<xsl:template match="body">
<div id="skinconf-toc-page"/>
<xsl:apply-templates/>
</xsl:template>
<!-- Generate a <a name="..."> tag for an @id -->
<xsl:template match="@id">
<xsl:if test="normalize-space(.)!=''"><a name="{.}"/>
</xsl:if>
</xsl:template>
<xsl:template match="section">
<!-- count the number of section in the ancestor-or-self axis to compute
the title element name later on -->
<xsl:variable name="sectiondepth" select="count(ancestor-or-self::section)"/><a name="{generate-id()}"/>
<xsl:apply-templates select="@id"/>
<!-- generate a title element, level 1 -> h3, level 2 -> h4 and so on... -->
<xsl:element name="{concat('h',$sectiondepth + 2)}">
<xsl:value-of select="title"/>
<xsl:if test="$notoc='true' and $sectiondepth = 3">
<span style="float: right"><a href="#{@id}-menu">^</a>
</span>
</xsl:if>
</xsl:element>
<!-- Indent FAQ entry text 15 pixels -->
<xsl:variable name="indent">
<xsl:choose>
<xsl:when test="$notoc='true' and $sectiondepth = 3">
<xsl:text>15</xsl:text>
</xsl:when>
<xsl:otherwise>
<xsl:text>0</xsl:text>
</xsl:otherwise>
</xsl:choose>
</xsl:variable>
<div style="margin-left: {$indent} ; border: 2px">
<xsl:apply-templates select="*[not(self::title)]"/>
</div>
</xsl:template>
<xsl:template match="note | warning | fixme">
<xsl:apply-templates select="@id"/>
<div>
<xsl:call-template name="add.class">
<xsl:with-param name="class">
<xsl:value-of select="local-name()"/>
</xsl:with-param>
</xsl:call-template>
<div class="label">
<xsl:choose>
<!-- FIXME: i18n Transformer here -->
<xsl:when test="@label">
<xsl:value-of select="@label"/>
</xsl:when>
<xsl:when test="local-name() = 'note'">Note</xsl:when>
<xsl:when test="local-name() = 'warning'">Warning</xsl:when>
<xsl:otherwise>Fixme (<xsl:value-of select="&#x40;author"/>)</xsl:otherwise>
</xsl:choose>
</div>
<div class="content">
<xsl:apply-templates/>
</div>
</div>
</xsl:template>
<xsl:template match="notice">
<div class="notice">
<!-- FIXME: i18n Transformer here -->
<xsl:text>Notice: </xsl:text>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="link">
<xsl:apply-templates select="@id"/><a>
<xsl:if test="@class='jump'">
<xsl:attribute name="target">_top</xsl:attribute>
</xsl:if>
<xsl:if test="@class='fork'">
<xsl:attribute name="target">_blank</xsl:attribute>
</xsl:if>
<xsl:copy-of select="@*"/>
<xsl:apply-templates/></a>
</xsl:template>
<xsl:template match="jump">
<xsl:apply-templates select="@id"/><a href="{@href}" target="_top">
<xsl:apply-templates/></a>
</xsl:template>
<xsl:template match="fork">
<xsl:apply-templates select="@id"/><a href="{@href}" target="_blank">
<xsl:apply-templates/></a>
</xsl:template>
<xsl:template match="p[@xml:space='preserve']">
<xsl:apply-templates select="@id"/>
<div class="pre">
<xsl:copy-of select="@id"/>
<xsl:apply-templates/>
</div>
</xsl:template>
<xsl:template match="source">
<xsl:apply-templates select="@id"/>
<pre class="code">
<!-- Temporarily removed long-line-splitter ... gives out-of-memory problems -->
<xsl:copy-of select="@id"/>
<xsl:apply-templates/>
<!--
<xsl:call-template name="format">
<xsl:with-param select="." name="txt" />
<xsl:with-param name="width">80</xsl:with-param>
</xsl:call-template>
-->
</pre>
</xsl:template>
<xsl:template match="anchor"><a name="{@id}">
<xsl:copy-of select="@id"/></a>
</xsl:template>
<xsl:template match="icon">
<xsl:apply-templates select="@id"/>
<img class="icon">
<xsl:copy-of select="@height | @width | @src | @alt | @id"/>
</img>
</xsl:template>
<xsl:template match="code">
<xsl:apply-templates select="@id"/>
<span>
<xsl:call-template name="add.class">
<xsl:with-param name="class">codefrag</xsl:with-param>
</xsl:call-template>
<xsl:copy-of select="@id"/>
<xsl:value-of select="."/>
</span>
</xsl:template>
<xsl:template match="figure">
<xsl:apply-templates select="@id"/>
<div align="center">
<xsl:copy-of select="@id"/>
<img class="figure">
<xsl:copy-of select="@height | @width | @src | @alt | @id"/>
</img>
</div>
</xsl:template>
<xsl:template match="table">
<xsl:apply-templates select="@id"/>
<xsl:choose>
<!-- Limit Forrest specific processing to tables without class -->
<xsl:when test="not(@class) or @class=''">
<table cellpadding="4" cellspacing="1" class="ForrestTable">
<xsl:copy-of select="@cellspacing | @cellpadding | @border | @class | @bgcolor |@id"/>
<xsl:apply-templates/>
</table>
</xsl:when>
<xsl:otherwise>
<!-- Tables with class are passed without change -->
<xsl:copy>
<xsl:copy-of select="@*"/>
<xsl:apply-templates/>
</xsl:copy>
</xsl:otherwise>
</xsl:choose>
<xsl:if test="@class = ''"></xsl:if>
</xsl:template>
<xsl:template match="acronym/@title">
<xsl:attribute name="title">
<xsl:value-of select="normalize-space(.)"/>
</xsl:attribute>
</xsl:template>
<xsl:template match="header/authors">
<xsl:for-each select="person">
<xsl:choose>
<xsl:when test="position()=1">by</xsl:when>
<xsl:otherwise>,</xsl:otherwise>
</xsl:choose>
<xsl:text> </xsl:text>
<xsl:value-of select="@name"/>
</xsl:for-each>
</xsl:template>
<xsl:template match="version">
<span class="version">
<xsl:apply-templates select="@major"/>
<xsl:apply-templates select="@minor"/>
<xsl:apply-templates select="@fix"/>
<xsl:apply-templates select="@tag"/>
<xsl:choose>
<xsl:when test="starts-with(., '$Revision: ')">
version <xsl:value-of select="substring(., 12, string-length(.) -11-2)"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="."/>
</xsl:otherwise>
</xsl:choose>
</span>
</xsl:template>
<xsl:template match="@major">
v<xsl:value-of select="."/>
</xsl:template>
<xsl:template match="@minor | @fix">
<xsl:value-of select="concat('.',.)"/>
</xsl:template>
<xsl:template match="@tag">
<xsl:value-of select="concat('-',.)"/>
</xsl:template>
<xsl:template match="type">
<p class="type">
<!-- FIXME: i18n Transformer here -->
<xsl:text>Type: </xsl:text>
<xsl:value-of select="."/>
</p>
</xsl:template>
<xsl:template match="abstract">
<p>
<xsl:apply-templates/>
</p>
</xsl:template>
<xsl:template name="email"><a>
<xsl:attribute name="href">
<xsl:value-of select="concat('mailto:',@email)"/>
</xsl:attribute>
<xsl:value-of select="@name"/></a>
</xsl:template>
<xsl:template name="generate-id">
<xsl:choose>
<xsl:when test="@id">
<xsl:value-of select="@id"/>
</xsl:when>
<xsl:when test="@title">
<xsl:value-of select="@title"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="generate-id(.)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Templates for "toc" mode. This will generate a complete
Table of Contents for the document. This will then be used
by the site2xhtml to generate a Menu ToC and a Page ToC -->
<xsl:template match="document" mode="toc">
<xsl:apply-templates mode="toc"/>
</xsl:template>
<xsl:template match="body" mode="toc">
<tocitems>
<xsl:if test="../header/meta[@name='forrest.force-toc'] = 'true'">
<xsl:attribute name="force">true</xsl:attribute>
</xsl:if>
<xsl:apply-templates select="section" mode="toc">
<xsl:with-param name="level" select="1"/>
</xsl:apply-templates>
</tocitems>
</xsl:template>
<xsl:template match="section" mode="toc">
<xsl:param name="level"/>
<tocitem level="{$level}">
<xsl:attribute name="href">#<xsl:call-template name="generate-id"/>
</xsl:attribute>
<xsl:attribute name="title">
<xsl:value-of select="title"/>
</xsl:attribute>
<xsl:apply-templates mode="toc">
<xsl:with-param name="level" select="$level+1"/>
</xsl:apply-templates>
</tocitem>
</xsl:template>
<xsl:template name="add.class">
<!-- use the parameter to set class attribute -->
<!-- if there are already classes set, adds to them -->
<xsl:param name="class"/>
<xsl:attribute name="class">
<xsl:choose>
<xsl:when test="@class">
<xsl:value-of select="$class"/>
<xsl:text> </xsl:text>
<xsl:value-of select="@class"/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$class"/>
</xsl:otherwise>
</xsl:choose>
</xsl:attribute>
</xsl:template>
<xsl:template match="node()|@*" mode="toc"/>
<!-- End of "toc" mode templates -->
<xsl:template match="node()|@*" priority="-1">
<!-- id processing will create its own a-element so processing has to
happen outside the copied element
-->
<xsl:apply-templates select="@id"/>
<xsl:copy>
<xsl:apply-templates select="@*[name(.) != 'id']"/>
<xsl:copy-of select="@id"/>
<xsl:apply-templates/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,73 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Contains the 'dotdots' template, which, given a path, will output a set of
directory traversals to get back to the source directory. Handles both '/' and
'\' directory separators.
Examples:
Input Output
index.html ""
dir/index.html "../"
dir/subdir/index.html "../../"
dir//index.html "../"
dir/ "../"
dir// "../"
\some\windows\path "../../"
\some\windows\path\ "../../../"
\Program Files\mydir "../"
Cannot handle ..'s in the path, so don't expect 'dir/subdir/../index.html' to
work.
-->
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template name="dotdots">
<xsl:param name="path"/>
<xsl:variable name="dirs" select="normalize-space(translate(concat($path, 'x'), ' /\', '_ '))"/>
<!-- The above does the following:
o Adds a trailing character to the path. This prevents us having to deal
with the special case of ending with '/'
o Translates all directory separators to ' ', and normalize spaces,
cunningly eliminating duplicate '//'s. We also translate any real
spaces into _ to preserve them.
-->
<xsl:variable name="remainder" select="substring-after($dirs, ' ')"/>
<xsl:if test="$remainder">
<xsl:text>../</xsl:text>
<xsl:call-template name="dotdots">
<xsl:with-param name="path" select="translate($remainder, ' ', '/')"/>
<!-- Translate back to /'s because that's what the template expects. -->
</xsl:call-template>
</xsl:if>
</xsl:template>
<!--
Uncomment to test.
Usage: saxon dotdots.xsl dotdots.xsl path='/my/test/path'
<xsl:param name="path"/>
<xsl:template match="/">
<xsl:message>Path: <xsl:value-of select="$path"/></xsl:message>
<xsl:call-template name="dotdots">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:template>
-->
</xsl:stylesheet>

View File

@ -0,0 +1,231 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!--
PathUtils.xsl
A set of XSLT templates useful for parsing URI paths:
dirname: return the directory part of a path
filename: return the file part of a path
ext: return the last extension of the filename in a path
filename-noext: return the file part of a path without its last extension
-->
<!-- Returns the directory part of a path. Equivalent to Unix 'dirname'.
Examples:
'' -> ''
'foo/index.html' -> 'foo/'
-->
<xsl:template name="dirname">
<xsl:param name="path" />
<xsl:if test="contains($path, '/')">
<xsl:value-of select="concat(substring-before($path, '/'), '/')" />
<xsl:call-template name="dirname">
<xsl:with-param name="path"
select="substring-after($path, '/')" />
</xsl:call-template>
</xsl:if>
</xsl:template>
<!-- Normalized (..'s eliminated) version of 'dirname' -->
<xsl:template name="dirname-nz">
<xsl:param name="path" />
<xsl:call-template name="normalize">
<xsl:with-param name="path">
<xsl:call-template name="dirname">
<xsl:with-param name="path" select="$path" />
</xsl:call-template>
</xsl:with-param>
</xsl:call-template>
</xsl:template>
<!-- Returns the filename part of a path. Equivalent to Unix 'basename'
Examples:
'index.html' -> 'index.html'
'foo/bar/' -> ''
'foo/bar/index.html' -> 'index.html'
-->
<xsl:template name="filename">
<xsl:param name="path"/>
<xsl:choose>
<xsl:when test="contains($path, '/')">
<xsl:call-template name="filename">
<xsl:with-param name="path" select="substring-after($path, '/')"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$path"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Returns the last extension of a filename in a path.
Examples:
'index.html' -> '.html'
'index.dtdx.html' -> '.html'
'foo/bar/' -> ''
'foo/bar/index.html' -> '.html'
'foo/bar/index' -> ''
-->
<xsl:template name="ext">
<xsl:param name="path"/>
<xsl:param name="subflag"/>
<!-- Outermost call? -->
<xsl:choose>
<xsl:when test="contains($path, '.')">
<xsl:call-template name="ext">
<xsl:with-param name="path" select="substring-after($path, '.')"/>
<xsl:with-param name="subflag" select="'sub'"/>
</xsl:call-template>
</xsl:when>
<!-- Handle extension-less filenames by returning '' -->
<xsl:when test="not($subflag) and not(contains($path, '.'))">
<xsl:text/>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="concat('.', $path)"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Returns a filename of a path stripped of its last extension.
Examples:
'foo/bar/index.dtdx.html' -> 'index.dtdx'
-->
<xsl:template name="filename-noext">
<xsl:param name="path"/>
<xsl:variable name="filename">
<xsl:call-template name="filename">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="ext">
<xsl:call-template name="ext">
<xsl:with-param name="path" select="$filename"/>
</xsl:call-template>
</xsl:variable>
<xsl:value-of select="substring($filename, 1, string-length($filename) - string-length($ext))"/>
</xsl:template>
<!-- Returns a path with the filename stripped of its last extension.
Examples:
'foo/bar/index.dtdx.html' -> 'foo/bar/index.dtdx'
-->
<xsl:template name="path-noext">
<xsl:param name="path"/>
<xsl:variable name="ext">
<xsl:call-template name="ext">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<xsl:value-of select="substring($path, 1, string-length($path) - string-length($ext))"/>
</xsl:template>
<!-- Normalized (..'s eliminated) version of 'path-noext' -->
<xsl:template name="path-noext-nz">
<xsl:param name="path" />
<xsl:call-template name="normalize">
<xsl:with-param name="path">
<xsl:call-template name="path-noext">
<xsl:with-param name="path" select="$path" />
</xsl:call-template>
</xsl:with-param>
</xsl:call-template>
</xsl:template>
<!-- Returns a path with any fragment identifier ('#...') stripped off
Examples:
'foo/bar/index.dtdx.html#blah' -> 'foo/bar/index.dtdx.html'
-->
<xsl:template name="path-nofrag">
<xsl:param name="path"/>
<xsl:if test="not(contains($path, '#'))">
<xsl:value-of select="$path"/>
</xsl:if>
<xsl:if test="contains($path, '#')">
<xsl:value-of select="substring-before($path, '#')"/>
</xsl:if>
</xsl:template>
<!-- Normalizes a path, converting '/' to '\' and eliminating ..'s
Examples:
'foo/bar/../baz/index.html' -> foo/baz/index.html'
-->
<xsl:template name="normalize">
<xsl:param name="path"/>
<!-- replace all \ with / -->
<xsl:variable name="path-" select="translate($path, '\', '/')"/>
<xsl:choose>
<!-- process relative refs here -->
<xsl:when test="contains($path-, '/../')">
<!-- put part before /../ into $pa: "foo/bar" -->
<xsl:variable name="pa" select="substring-before($path-, '/../')"/>
<!-- put part after first occurrence /../ into $th: "baz/index.html" -->
<xsl:variable name="th" select="substring-after($path-, '/../')"/>
<!-- cut last real directory name before /../ and put rest in $pa- : "foo/" -->
<xsl:variable name="pa-">
<xsl:call-template name="dirname">
<xsl:with-param name="path" select="$pa"/>
</xsl:call-template>
</xsl:variable>
<!-- recombine pieces thus eliminating one .. and one dir step before it
and recurse into this template to eliminate more /../
-->
<xsl:variable name="pa-th" select="concat($pa-, $th)"/>
<xsl:call-template name="normalize">
<xsl:with-param name="path" select="$pa-th"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$path-"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!--
Uncomment this to test.
Usage: saxon pathutils.xsl pathutils.xsl path=foo/bar
<xsl:param name="path" select="'/foo/bar/../baz/index.html'"/>
<xsl:template match="/">
<xsl:message>
path = <xsl:value-of select="$path"/>
normalize = <xsl:call-template name="normalize">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
dirname = <xsl:call-template name="dirname">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
dirname-nz = <xsl:call-template name="dirname-nz">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
filename = <xsl:call-template name="filename">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
ext = <xsl:call-template name="ext">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
filename-noext = <xsl:call-template name="filename-noext">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
path-noext = <xsl:call-template name="path-noext">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
path-noext-nz = <xsl:call-template name="path-noext-nz">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
path-nofrag = <xsl:call-template name="path-nofrag">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:message>
</xsl:template>
-->
</xsl:stylesheet>

View File

@ -0,0 +1,67 @@
<?xml version="1.0" encoding="utf-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
A simple callable template that renders a logo for an entity. The logo will
be a hyperlink and may include an image (with width and height if specified)
or else it will just include the specified text.
Note that text and image are mandatory parts of the template.
-->
<xsl:stylesheet
version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:template name="renderlogo">
<xsl:param name="name"/>
<xsl:param name="url"/>
<xsl:param name="logo"/>
<xsl:param name="width"/>
<xsl:param name="height"/>
<xsl:param name="root"/>
<xsl:param name="description"/><a href="{$url}">
<xsl:choose>
<xsl:when test="$logo and not($logo = '')">
<img alt="{$name}" class="logoImage">
<xsl:attribute name="src">
<xsl:if test="not(starts-with($logo, 'http://'))">
<xsl:value-of select="$root"/>
</xsl:if>
<xsl:value-of select="$logo"/>
</xsl:attribute>
<xsl:if test="$width">
<xsl:attribute name="width">
<xsl:value-of select="$width"/>
</xsl:attribute>
</xsl:if>
<xsl:if test="$height">
<xsl:attribute name="height">
<xsl:value-of select="$height"/>
</xsl:attribute>
</xsl:if>
<xsl:if test="$description">
<xsl:attribute name="title">
<xsl:value-of select="$description"/>
</xsl:attribute>
</xsl:if>
</img>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$name"/>
</xsl:otherwise>
</xsl:choose></a>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,388 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
site-to-xhtml.xsl is the final stage in HTML page production. It merges HTML from
document-to-html.xsl, tab-to-menu.xsl and book-to-menu.xsl, and adds the site header,
footer, searchbar, css etc. As input, it takes XML of the form:
<site>
<div class="menu">
...
</div>
<div class="tab">
...
</div>
<div class="content">
...
</div>
</site>
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:java="http://xml.apache.org/xslt/java" exclude-result-prefixes="java">
<xsl:variable name="config" select="//skinconfig"/>
<!-- If true, a txt link for this page will not be generated -->
<xsl:variable name="disable-txt-link" select="//skinconfig/disable-txt-link"/>
<!-- If true, a PDF link for this page will not be generated -->
<xsl:variable name="disable-pdf-link" select="//skinconfig/disable-pdf-link"/>
<!-- If true, a "print" link for this page will not be generated -->
<xsl:variable name="disable-print-link" select="//skinconfig/disable-print-link"/>
<!-- If true, an XML link for this page will not be generated -->
<xsl:variable name="disable-xml-link" select="//skinconfig/disable-xml-link"/>
<!-- If true, a POD link for this page will not be generated -->
<xsl:variable name="disable-pod-link" select="//skinconfig/disable-pod-link"/>
<!-- Get the location where to generate the minitoc -->
<xsl:variable name="minitoc-location" select="//skinconfig/toc/@location"/>
<xsl:param name="path"/>
<xsl:include href="dotdots.xsl"/>
<xsl:include href="pathutils.xsl"/>
<xsl:include href="renderlogo.xsl"/>
<!-- Path (..'s) to the root directory -->
<xsl:variable name="root">
<xsl:call-template name="dotdots">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<!-- Source filename (eg 'foo.xml') of current page -->
<xsl:variable name="filename">
<xsl:call-template name="filename">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<!-- Path of Lucene search results page (relative to $root) -->
<xsl:param name="lucene-search" select="'lucene-search.html'"/>
<xsl:variable name="filename-noext">
<xsl:call-template name="filename-noext">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<!-- Whether to obfuscate email links -->
<xsl:variable name="obfuscate-mail-links" select="//skinconfig/obfuscate-mail-links"/>
<!-- If true, the font size script will not be rendered -->
<xsl:variable name="disable-font-script" select="//skinconfig/disable-font-script"/>
<!-- If true, an the images on all external links will not be added -->
<xsl:variable name="disable-external-link-image" select="//skinconfig/disable-external-link-image"/>
<xsl:variable name="skin-img-dir" select="concat(string($root), 'skin/images')"/>
<xsl:variable name="spacer" select="concat($root, 'skin/images/spacer.gif')"/>
<xsl:template name="breadcrumbs">
<xsl:if test="(//skinconfig/trail/link1/@name)and(//skinconfig/trail/link1/@name!='')"><a href="{//skinconfig/trail/link1/@href}">
<xsl:value-of select="//skinconfig/trail/link1/@name"/></a>
</xsl:if>
<xsl:if test="(//skinconfig/trail/link2/@name)and(//skinconfig/trail/link2/@name!='')"> &gt; <a href="{//skinconfig/trail/link2/@href}">
<xsl:value-of select="//skinconfig/trail/link2/@name"/></a>
</xsl:if>
<xsl:if test="(//skinconfig/trail/link3/@name)and(//skinconfig/trail/link3/@name!='')"> &gt; <a href="{//skinconfig/trail/link3/@href}">
<xsl:value-of select="//skinconfig/trail/link3/@name"/></a>
</xsl:if>
<script type="text/javascript" language="JavaScript" src="{$root}skin/breadcrumbs.js"/>
</xsl:template>
<xsl:template match="site">
<html>
<head>
<title><xsl:value-of select="div[@class='content']/table/tr/td/h1"/></title>
<xsl:if test="//skinconfig/favicon-url"><link rel="shortcut icon">
<xsl:attribute name="href">
<xsl:value-of select="concat($root,//skinconfig/favicon-url)"/>
</xsl:attribute></link>
</xsl:if>
</head>
<body>
<xsl:if test="//skinconfig/group-url">
<xsl:call-template name="renderlogo">
<xsl:with-param name="name" select="//skinconfig/group-name"/>
<xsl:with-param name="url" select="//skinconfig/group-url"/>
<xsl:with-param name="logo" select="//skinconfig/group-logo"/>
<xsl:with-param name="root" select="$root"/>
<xsl:with-param name="description" select="//skinconfig/group-description"/>
</xsl:call-template>
</xsl:if>
<xsl:call-template name="renderlogo">
<xsl:with-param name="name" select="//skinconfig/project-name"/>
<xsl:with-param name="url" select="//skinconfig/project-url"/>
<xsl:with-param name="logo" select="//skinconfig/project-logo"/>
<xsl:with-param name="root" select="$root"/>
<xsl:with-param name="description" select="//skinconfig/project-description"/>
</xsl:call-template>
<xsl:comment>================= start Tabs ==================</xsl:comment>
<xsl:apply-templates select="div[@class='tab']"/>
<xsl:comment>================= end Tabs ==================</xsl:comment>
<xsl:comment>================= start Menu items ==================</xsl:comment>
<xsl:apply-templates select="div[@class='menu']"/>
<xsl:comment>================= end Menu items ==================</xsl:comment>
<xsl:comment>================= start Content==================</xsl:comment>
<xsl:apply-templates select="div[@class='content']"/>
<xsl:comment>================= end Content==================</xsl:comment>
<xsl:comment>================= start Footer ==================</xsl:comment>
<xsl:choose>
<xsl:when test="$config/copyright-link"><a>
<xsl:attribute name="href">
<xsl:value-of select="$config/copyright-link"/>
</xsl:attribute>
Copyright &#169; <xsl:value-of select="$config/year"/>
<xsl:call-template name="current-year">
<xsl:with-param name="copyrightyear" select="$config/year"/>
</xsl:call-template>&#160;
<xsl:value-of select="$config/vendor"/></a>
</xsl:when>
<xsl:otherwise>
Copyright &#169; <xsl:value-of select="$config/year"/>
<xsl:call-template name="current-year">
<xsl:with-param name="copyrightyear" select="$config/year"/>
</xsl:call-template>&#160;
<xsl:value-of select="$config/vendor"/>
</xsl:otherwise>
</xsl:choose>
All rights reserved.
<script language="JavaScript" type="text/javascript"><![CDATA[<!--
document.write(" - "+"Last Published: " + document.lastModified);
// -->]]></script>
<xsl:if test="//skinconfig/host-logo and not(//skinconfig/host-logo = '')"><a href="{//skinconfig/host-url}">
<xsl:call-template name="renderlogo">
<xsl:with-param name="name" select="//skinconfig/host-name"/>
<xsl:with-param name="url" select="//skinconfig/host-url"/>
<xsl:with-param name="logo" select="//skinconfig/host-logo"/>
<xsl:with-param name="root" select="$root"/>
</xsl:call-template></a>
</xsl:if>
<xsl:if test="$filename = 'index.html' and //skinconfig/credits">
<xsl:for-each select="//skinconfig/credits/credit[not(@role='pdf')]">
<xsl:call-template name="renderlogo">
<xsl:with-param name="name" select="name"/>
<xsl:with-param name="url" select="url"/>
<xsl:with-param name="logo" select="image"/>
<xsl:with-param name="root" select="$root"/>
<xsl:with-param name="width" select="width"/>
<xsl:with-param name="height" select="height"/>
</xsl:call-template>
</xsl:for-each>
</xsl:if><a href="http://validator.w3.org/check/referer">
<img class="skin" border="0"
src="http://www.w3.org/Icons/valid-html401"
alt="Valid HTML 4.01!" height="31" width="88"/></a>
</body>
</html>
</xsl:template>
<!-- Add links to any standards-compliance logos -->
<xsl:template name="compliancy-logos">
<xsl:if test="$filename = 'index.html' and //skinconfig/disable-compliance-links = 'false'"><a href="http://validator.w3.org/check/referer">
<img class="logoImage"
src="{$skin-img-dir}/valid-html401.png"
alt="Valid HTML 4.01!" title="Valid HTML 4.01!" height="31" width="88" border="0"/></a><a href="http://jigsaw.w3.org/css-validator/check/referer">
<img class="logoImage"
src="{$skin-img-dir}/vcss.png"
alt="Valid CSS!" title="Valid CSS!" height="31" width="88" border="0"/></a>
</xsl:if>
</xsl:template>
<!-- Generates the PDF link -->
<xsl:template match="div[@id='skinconf-pdflink']">
<xsl:if test="not($config/disable-pdf-link) or $disable-pdf-link = 'false'">
<td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.pdf" class="dida">
<img class="skin" src="{$skin-img-dir}/pdfdoc.gif" alt="PDF"/>
<br/>
PDF</a>
</td>
</xsl:if>
</xsl:template>
<!-- Generates the TXT link -->
<xsl:template match="div[@id='skinconf-txtlink']">
<xsl:if test="$disable-txt-link = 'false'">
<td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.txt" class="dida">
<img class="skin" src="{$skin-img-dir}/txtdoc.png" alt="TXT"/>
<br/>
TXT</a>
</td>
</xsl:if>
</xsl:template>
<!-- Generates the POD link -->
<xsl:template match="div[@id='skinconf-podlink']">
<xsl:if test="$disable-pod-link = 'false'">
<td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.pod" class="dida">
<img class="skin" src="{$skin-img-dir}/poddoc.png" alt="POD"/>
<br/>
POD</a>
</td>
</xsl:if>
</xsl:template>
<!-- Generates the XML link -->
<xsl:template match="div[@id='skinconf-xmllink']">
<xsl:if test="$disable-xml-link = 'false'">
<td align="center" width="40" nowrap="nowrap"><a href="{$filename-noext}.xml" class="dida">
<img class="skin" src="{$skin-img-dir}/xmldoc.gif" alt="XML"/>
<br/>
XML</a>
</td>
</xsl:if>
</xsl:template>
<!-- Generates the "printer friendly version" link -->
<xsl:template match="div[@id='skinconf-printlink']">
<xsl:if test="$disable-print-link = 'false'">
<script type="text/javascript" language="Javascript">
function printit() {
if (window.print) {
window.focus();
window.print();
}
}
var NS = (navigator.appName == "Netscape");
var VERSION = parseInt(navigator.appVersion);
if (VERSION > 3) {
document.write('<td align="center" width="40" nowrap="nowrap">');
document.write(' <a href="javascript:printit()" class="dida">');
document.write(' <img class="skin" src="{$skin-img-dir}/printer.gif" alt="Print this Page"/><br />');
document.write(' print</a>');
document.write('</td>');
}
</script>
</xsl:if>
</xsl:template>
<!-- handle all obfuscating mail links and disabling external link images -->
<xsl:template match="a">
<xsl:choose>
<xsl:when test="$obfuscate-mail-links='true' and starts-with(@href, 'mailto:') and contains(@href, '@')">
<xsl:variable name="mailto-1" select="substring-before(@href,'@')"/>
<xsl:variable name="mailto-2" select="substring-after(@href,'@')"/>
<xsl:variable name="obfuscation" select="normalize-space(//skinconfig/obfuscate-mail-value)"/><a href="{$mailto-1}{$obfuscation}{$mailto-2}">
<xsl:apply-templates/></a>
</xsl:when>
<xsl:when test="not($disable-external-link-image='true') and contains(@href, ':') and not(contains(@href, //skinconfig/group-url)) and not(contains(@href, //skinconfig/project-url))"><a href="{@href}" class="external">
<xsl:apply-templates/></a>
</xsl:when>
<xsl:otherwise>
<!-- xsl:copy-of makes sure we copy <a href> as well as <a name>
or any other <a ...> forms -->
<xsl:copy-of select="."/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="div[@id='skinconf-toc-page']">
<xsl:if test="$config/toc">
<xsl:if test="contains($minitoc-location,'page')">
<xsl:if test="(count(//tocitems/tocitem) >= $config/toc/@min-sections) or (//tocitems/@force = 'true')">
<xsl:call-template name="minitoc">
<xsl:with-param name="tocroot" select="//tocitems"/>
</xsl:call-template>
</xsl:if>
</xsl:if>
</xsl:if>
</xsl:template>
<xsl:template name="minitoc">
<xsl:param name="tocroot"/>
<xsl:if test="(count($tocroot/tocitem) >= $config/toc/@min-sections) or ($tocroot/@force = 'true')">
<xsl:if test="contains($config/toc/@location,'page')">
<ul class="minitoc">
<xsl:for-each select="$tocroot/tocitem">
<li><a href="{@href}">
<xsl:value-of select="@title"/></a>
<xsl:if test="@level&lt;//skinconfig/toc/@max-depth+1">
<xsl:call-template name="minitoc">
<xsl:with-param name="tocroot" select="."/>
</xsl:call-template>
</xsl:if></li>
</xsl:for-each>
</ul>
</xsl:if>
</xsl:if>
</xsl:template>
<xsl:template name="html-meta">
<meta name="Generator" content="Apache Forrest"/>
<meta name="Forrest-version">
<xsl:attribute name="content">
<xsl:value-of select="//info/forrest-version"/>
</xsl:attribute>
</meta>
<meta name="Forrest-skin-name">
<xsl:attribute name="content">
<xsl:value-of select="//info/project-skin"/>
</xsl:attribute>
</meta>
</xsl:template>
<!-- meta information from v 2.0 documents
FIXME: the match is really inefficient -->
<xsl:template name="meta-data">
<xsl:for-each select="//meta-data/meta">
<xsl:element name="meta">
<xsl:attribute name="name">
<xsl:value-of select="@name"/>
</xsl:attribute>
<xsl:attribute name="content">
<xsl:value-of select="."/>
</xsl:attribute>
<xsl:if test="@xml:lang">
<xsl:attribute name="lang">
<xsl:value-of select="@xml:lang"/>
</xsl:attribute>
</xsl:if>
</xsl:element>
</xsl:for-each>
</xsl:template>
<xsl:template name="feedback">
<div id="feedback">
<xsl:value-of select="$config/feedback"/>
<xsl:choose>
<xsl:when test="$config/feedback/@href and not($config/feedback/@href='')"><a id="feedbackto">
<xsl:attribute name="href">
<xsl:value-of select="$config/feedback/@href"/>
<xsl:value-of select="$path"/>
</xsl:attribute>
<xsl:value-of select="$config/feedback/@to"/></a>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$config/feedback/@to"/>
</xsl:otherwise>
</xsl:choose>
</div>
</xsl:template>
<xsl:template match="node()|@*" priority="-1">
<xsl:copy>
<xsl:apply-templates select="@*"/>
<xsl:apply-templates/>
</xsl:copy>
</xsl:template>
<!-- inception year copyright management -->
<xsl:template name="current-year">
<!-- Displays the current year after the inception year (in the copyright i.e: 2002-2005)
- the copyright year (2005 by default) can be indicated in the copyrightyear parameter,
- the year format (yyyy by default) can be indicated in the dateformat parameter,
- the dates separator (- by default) can be indicated in the dateseparator parameter.
For instance the following call will format the year on 2 digits and separates the dates
with /
(copyright 02/05)
<xsl:call-template name="current-year">
<xsl:with-param name="copyrightyear" select="'02'"/>
<xsl:with-param name="dateformat" select="'yy'"/>
<xsl:with-param name="dateseparator" select="'/'"/>
</xsl:call-template>
Warning, to enable inception year, inception attribute must be set to "true" in copyright/year/@inception
-->
<xsl:param name="copyrightyear">2005</xsl:param>
<xsl:param name="dateformat">yyyy</xsl:param>
<xsl:param name="dateseparator">-</xsl:param>
<xsl:if test="$copyrightyear[@inception = 'true']">
<xsl:variable name="tz" select='java:java.util.SimpleTimeZone.new(0,"GMT+00:00")' />
<xsl:variable name="formatter" select="java:java.text.SimpleDateFormat.new($dateformat)"/>
<xsl:variable name="settz" select="java:setTimeZone($formatter, $tz)" />
<xsl:variable name="date" select="java:java.util.Date.new()"/>
<xsl:variable name="year" select="java:format($formatter, $date)"/>
<xsl:if test="$copyrightyear != $year">
<xsl:value-of select="$dateseparator"/>
<xsl:value-of select="$year"/>
</xsl:if>
</xsl:if>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,124 @@
<?xml version="1.0"?>
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="1.0">
<!--
This stylesheet was taken from the XSLT FAQ http://www.dpawson.co.uk/xsl/
Comments and adaption to be used without normalize-space()
by forrest-dev@xml.apache.org
-->
<!--
Input:
<doc>
<para>
123456 2345 343434 545454 43434 343
12345 343434 545454 43434 343
32345645 343434 545454 43434 343
3422222225 343434 545454 43434 343
llllllllllllllllllllllooooooooooooooonnnnnnnnnnnggggggggg
345 343434 545454 43434 343
</para>
</doc>
Output:
<HTML>
<BODY>
<PRE>123456 2345 343434 545454
43434 343 12345 343434 545454
43434 343 32345645 343434
545454 43434 343 3422222225
343434 545454 43434 343
lllllllllllllllllllllloooooooo
ooooooonnnnnnnnnnnggggggggg
345 343434 545454 43434
343
</PRE>
</BODY>
</HTML>
Fragment ised:
<xsl:template match="/doc">
<HTML><BODY><PRE>
<xsl:call-template name="format">
<xsl:with-param select="normalize-space(para)" name="txt" />
<xsl:with-param name="width">30</xsl:with-param>
</xsl:call-template>
</PRE></BODY></HTML>
</xsl:template>
-->
<xsl:template match="/body">
<body>
<xsl:call-template name="format">
<xsl:with-param select="source" name="txt" />
<xsl:with-param name="width">40</xsl:with-param>
</xsl:call-template>
</body>
</xsl:template>
<xsl:template name="format">
<xsl:param name="txt" />
<xsl:param name="width" />
<!-- if there is still text left -->
<xsl:if test="$txt">
<xsl:variable name = "pretxt" select = "substring($txt,0, $width)" />
<xsl:choose>
<xsl:when test="contains($pretxt, '&#xA;')">
<xsl:value-of select="substring-before($pretxt, '&#xA;')"/>
<xsl:text>&#xA;</xsl:text>
<xsl:call-template name="format">
<xsl:with-param name="txt" select="substring-after($txt,'&#xA;')"/>
<xsl:with-param select="$width" name="width" />
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<!-- get the width at which to break-->
<xsl:variable name="real-width">
<xsl:call-template name="tune-width">
<xsl:with-param select="$txt" name="txt" />
<xsl:with-param select="$width" name="width" />
<xsl:with-param select="$width" name="def" />
</xsl:call-template>
</xsl:variable>
<!-- output the first part of the broken string -->
<xsl:value-of select="substring($txt, 1, $real-width)" />
<!-- output a newline -->
<xsl:text>&#xA;</xsl:text>
<!-- call itself with the remaining part of the text -->
<xsl:call-template name="format">
<xsl:with-param select="substring($txt,$real-width + 1)" name="txt" />
<xsl:with-param select="$width" name="width" />
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:if>
</xsl:template>
<!-- used by template "format", it calculates the width at the given def
It starts at def length and comes back till it finds a space -->
<xsl:template name="tune-width">
<xsl:param name="txt" />
<xsl:param name="width" />
<xsl:param name="def" />
<xsl:choose>
<xsl:when test="$width = 0">
<xsl:value-of select="$def" />
</xsl:when>
<xsl:when test="substring($txt, $width, 1 ) = ' '">
<xsl:value-of select="$width" />
</xsl:when>
<xsl:otherwise>
<!-- otherwise need to tune again, trying with $width - 1 -->
<xsl:call-template name="tune-width">
<xsl:with-param select="$width - 1" name="width" />
<xsl:with-param select="$txt" name="txt" />
<xsl:with-param select="$def" name="def" />
</xsl:call-template>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!-- FIXME: FOR-555. This might not be the best solution though, but it sure works -->
<xsl:template match="comment()|processing-instruction()">
<xsl:copy>
<xsl:apply-templates select="@*|*|text()|processing-instruction()|comment()"/>
</xsl:copy>
</xsl:template>
<!-- End fixme FOR-555 -->
<xsl:template match="*">
<!-- remove element prefix (if any) -->
<xsl:element name="{local-name()}">
<!-- process attributes -->
<xsl:for-each select="@*">
<!-- remove attribute prefix (if any) -->
<xsl:attribute name="{local-name()}">
<xsl:value-of select="."/>
</xsl:attribute>
</xsl:for-each>
<xsl:apply-templates/>
</xsl:element>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,195 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
This stylesheet generates 'tabs' at the top left of the screen. Tabs are
visual indicators that a certain subsection of the URI space is being browsed.
For example, if we had tabs with paths:
Tab1: ''
Tab2: 'community'
Tab3: 'community/howto'
Tab4: 'community/howto/xmlform/index.html'
Then if the current path was 'community/howto/foo', Tab3 would be highlighted.
The rule is: the tab with the longest path that forms a prefix of the current
path is enabled.
The output of this stylesheet is HTML of the form:
<div class="tab">
...
</div>
which is then merged by site-to-xhtml.xsl
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<!-- ================================================================ -->
<!-- These templates SHOULD be overridden -->
<!-- ================================================================ -->
<!-- Called before first level 1 tag -->
<xsl:template name="pre-separator"></xsl:template>
<!-- Called after last level 1 tag -->
<xsl:template name="post-separator"></xsl:template>
<!-- Called between level 1 tags -->
<xsl:template name="separator">
<xsl:text> | </xsl:text>
</xsl:template>
<!-- Called before first level 2 tag -->
<xsl:template name="level2-pre-separator"></xsl:template>
<!-- Called after last level 2 tag -->
<xsl:template name="level2-post-separator"></xsl:template>
<!-- Called between level 2 tags -->
<xsl:template name="level2-separator">
<xsl:text> | </xsl:text>
</xsl:template>
<!--
Note: sub-stylesheets can't do apply-imports here, because it would choose
the 'tags' template and infinitely recurse. Hence call-template used instead.
-->
<!-- Display a selected level 1 tab node -->
<xsl:template name="selected">
<xsl:call-template name="base-selected"/>
</xsl:template>
<!-- Display an unselected level 1 tab node -->
<xsl:template name="not-selected">
<xsl:call-template name="base-not-selected"/>
</xsl:template>
<!-- Display a selected second level tab node -->
<xsl:template name="level2-selected">
<xsl:call-template name="base-selected"/>
</xsl:template>
<!-- Display an unselected second level tab node -->
<xsl:template name="level2-not-selected">
<xsl:call-template name="base-not-selected"/>
</xsl:template>
<!-- ================================================================ -->
<!-- These templates CAN be overridden -->
<!-- ================================================================ -->
<xsl:template match="tabs">
<div class="tab">
<xsl:call-template name="base-tabs"/>
</div>
</xsl:template>
<!-- ================================================================ -->
<!-- These templates SHOULD NOT be overridden -->
<!-- ================================================================ -->
<xsl:param name="path"/>
<xsl:include href="dotdots.xsl"/>
<xsl:include href="tabutils.xsl"/>
<!-- NOTE: Xalan has a bug (race condition?) where sometimes $root is only half-evaluated -->
<xsl:variable name="root">
<xsl:call-template name="dotdots">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="skin-img-dir" select="concat(string($root), 'skin/images')"/>
<!--
The longest path of any level 1 tab, whose path is a subset of the current URL. Ie,
the path of the 'current' level 1 tab.
-->
<xsl:variable name="longest-dir">
<xsl:call-template name="longest-dir">
<xsl:with-param name="tabfile" select="/"/>
</xsl:call-template>
</xsl:variable>
<!--
The longest path of any level 2 tab, whose path is a subset of the current URL. Ie,
the path of the 'current' level 2 tab.
-->
<xsl:variable name="level2-longest-dir">
<xsl:call-template name="level2-longest-dir">
<xsl:with-param name="tabfile" select="/"/>
</xsl:call-template>
</xsl:variable>
<xsl:variable name="matching-id">
<xsl:call-template name="matching-id"/>
</xsl:variable>
<!-- Called from tabs, after it has written the outer 'div class=tabs' and
any other HTML -->
<xsl:template name="base-tabs">
<xsl:call-template name="pre-separator"/>
<xsl:for-each select="tab">
<xsl:if test="position()!=1">
<xsl:call-template name="separator"/>
</xsl:if>
<xsl:apply-templates select="." mode="level1"/>
</xsl:for-each>
<xsl:call-template name="post-separator"/>
</xsl:template>
<!-- Called from tabs, after it has written the outer 'div class=tabs' and
any other HTML -->
<xsl:template name="level2tabs">
<xsl:call-template name="level2-pre-separator"/>
<xsl:for-each select="tab[@dir=$longest-dir]/tab|tab[@href=$longest-dir]/tab|tab[tab/@id=$matching-id]/tab">
<xsl:if test="position()!=1">
<xsl:call-template name="level2-separator"/>
</xsl:if>
<xsl:apply-templates select="." mode="level2"/>
</xsl:for-each>
<xsl:call-template name="level2-post-separator"/>
</xsl:template>
<xsl:template match="tab" mode="level1">
<xsl:choose>
<xsl:when test="@id and @id = $matching-id">
<xsl:call-template name="selected"/>
</xsl:when>
<xsl:when test="not(@id) and @dir = $longest-dir or @href = $longest-dir">
<xsl:call-template name="selected"/>
</xsl:when>
<xsl:when test="tab[@id = $matching-id]">
<xsl:call-template name="selected"/>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="not-selected"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<xsl:template match="tab" mode="level2">
<xsl:choose>
<xsl:when test="@id and @id = $matching-id">
<xsl:call-template name="level2-selected"/>
</xsl:when>
<xsl:when test="@dir = $level2-longest-dir">
<xsl:call-template name="level2-selected"/>
</xsl:when>
<xsl:otherwise>
<xsl:call-template name="level2-not-selected"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
<!-- Called from 'selected' -->
<xsl:template name="base-selected"><a class="selected">
<xsl:attribute name="href">
<xsl:call-template name="calculate-tab-href">
<xsl:with-param name="tab" select="."/>
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:attribute>
<xsl:value-of select="@label"/></a>
</xsl:template>
<!-- Called from 'not-selected' -->
<xsl:template name="base-not-selected"><a class="unselected">
<xsl:attribute name="href">
<xsl:call-template name="calculate-tab-href">
<xsl:with-param name="tab" select="."/>
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
</xsl:attribute>
<xsl:value-of select="@label"/></a>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,98 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<!--
Some callable templates useful when dealing with tab paths. Mostly used in
tab-to-menu.xsl
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:param name="site-file" select="'cocoon://abs-menulinks'"/>
<xsl:variable name="site" select="document($site-file)"/>
<!-- Given the current path and a tabs.xml entry, returns a relative path to
the specified tab's URL. When rendering a set of tabs, this template will be
called once per tab.
-->
<xsl:template name="calculate-tab-href">
<xsl:param name="dir_index" select="'index.html'"/>
<xsl:param name="tab"/>
<!-- current 'tab' node -->
<xsl:param name="path" select="$path"/>
<xsl:if test="starts-with($tab/@href, 'http')">
<!-- Absolute URL -->
<xsl:value-of select="$tab/@href"/>
</xsl:if>
<xsl:if test="not(starts-with($tab/@href, 'http'))">
<!-- Root-relative path -->
<xsl:variable name="backpath">
<xsl:call-template name="dotdots">
<xsl:with-param name="path" select="$path"/>
</xsl:call-template>
<xsl:text>/</xsl:text>
<xsl:value-of select="$tab/@dir | $tab/@href"/>
<!-- If we obviously have a directory, add /index.html -->
<xsl:if test="$tab/@dir or substring($tab/@href, string-length($tab/@href),
string-length($tab/@href)) = '/'">
<xsl:text>/</xsl:text>
<xsl:if test="$tab/@indexfile">
<xsl:value-of select="$tab/@indexfile"/>
</xsl:if>
<xsl:if test="not(@indexfile)">
<xsl:value-of select="$dir_index"/>
</xsl:if>
</xsl:if>
</xsl:variable>
<xsl:value-of
select="translate(normalize-space(translate($backpath, ' /', '/ ')), ' /', '/ ')"/>
<!-- Link to backpath, normalizing slashes -->
</xsl:if>
</xsl:template>
<!--
The id of any tab, whose path is a subset of the current URL. Ie,
the path of the 'current' tab.
-->
<xsl:template name="matching-id" xmlns:l="http://apache.org/forrest/linkmap/1.0">
<xsl:value-of select="$site//*[starts-with(@href, $path)]/@tab"/>
</xsl:template>
<!--
The longest path of any level 1 tab, whose path is a subset of the current URL. Ie,
the path of the 'current' tab.
-->
<xsl:template name="longest-dir">
<xsl:param name="tabfile"/>
<xsl:for-each select="$tabfile/tabs/tab[starts-with($path, @dir|@href)]">
<xsl:sort select="string-length(@dir|@href)"
data-type="number" order="descending"/>
<xsl:if test="position()=1">
<xsl:value-of select="@dir|@href"/>
</xsl:if>
</xsl:for-each>
</xsl:template>
<!--
The longest path of any level 2 tab, whose path is a subset of the current URL. Ie,
the path of the 'current' tab.
-->
<xsl:template name="level2-longest-dir">
<xsl:param name="tabfile"/>
<xsl:for-each select="$tabfile/tabs/tab/tab[starts-with($path, @dir|@href)]">
<xsl:sort select="string-length(@dir|@href)"
data-type="number" order="descending"/>
<xsl:if test="position()=1">
<xsl:value-of select="@dir|@href"/>
</xsl:if>
</xsl:for-each>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,45 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform" >
<xsl:output method="xml" media-type="image/svg" omit-xml-declaration="yes" indent="yes"/>
<!-- the skinconf file -->
<xsl:param name="config-file"/>
<xsl:variable name="config" select="document($config-file)/skinconfig"/>
<!-- Get the section depth to use when generating the minitoc (default is 2) -->
<xsl:variable name="toc-max-depth" select="number($config/toc/@max-depth)"/>
<xsl:param name="numbersections" select="'true'"/>
<!-- Section depth at which we stop numbering and just indent -->
<xsl:param name="numbering-max-depth" select="'3'"/>
<xsl:param name="ctxbasedir" select="."/>
<xsl:param name="xmlbasedir"/>
<xsl:template match="/">
<svg width="1305" height="1468" xmlns="http://www.w3.org/2000/svg">
<g transform="translate(0 0)">
<xsl:apply-templates/>
</g>
</svg>
</xsl:template>
<xsl:template match="document">
<text x="00px" y="30px" style="font-size:20;">
<xsl:value-of select="header/title"/>
</text>
<text x="0px" y="50px" style="font-size:12;">
<xsl:apply-templates/>
</text>
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,166 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* General
*/
img { border: 0; }
#content table {
border: 0;
width: 100%;
}
/*Hack to get IE to render the table at 100%*/
* html #content table { margin-left: -3px; }
#content th,
#content td {
margin: 0;
padding: 0;
vertical-align: top;
}
.clearboth {
clear: both;
}
.note, .warning, .fixme {
border: solid black 1px;
margin: 1em 3em;
}
.note .label {
background: #369;
color: white;
font-weight: bold;
padding: 5px 10px;
}
.note .content {
background: #F0F0FF;
color: black;
line-height: 120%;
font-size: 90%;
padding: 5px 10px;
}
.warning .label {
background: #C00;
color: white;
font-weight: bold;
padding: 5px 10px;
}
.warning .content {
background: #FFF0F0;
color: black;
line-height: 120%;
font-size: 90%;
padding: 5px 10px;
}
.fixme .label {
background: #C6C600;
color: black;
font-weight: bold;
padding: 5px 10px;
}
.fixme .content {
padding: 5px 10px;
}
/**
* Typography
*/
body {
font-family: verdana, "Trebuchet MS", arial, helvetica, sans-serif;
font-size: 100%;
}
#content {
font-family: Georgia, Palatino, Times, serif;
font-size: 95%;
}
#tabs {
font-size: 70%;
}
#menu {
font-size: 80%;
}
#footer {
font-size: 70%;
}
h1, h2, h3, h4, h5, h6 {
font-family: "Trebuchet MS", verdana, arial, helvetica, sans-serif;
font-weight: bold;
margin-top: 1em;
margin-bottom: .5em;
}
h1 {
margin-top: 0;
margin-bottom: 1em;
font-size: 1.4em;
}
#content h1 {
font-size: 160%;
margin-bottom: .5em;
}
#menu h1 {
margin: 0;
padding: 10px;
background: #336699;
color: white;
}
h2 { font-size: 120%; }
h3 { font-size: 100%; }
h4 { font-size: 90%; }
h5 { font-size: 80%; }
h6 { font-size: 75%; }
p {
line-height: 120%;
text-align: left;
margin-top: .5em;
margin-bottom: 1em;
}
#content li,
#content th,
#content td,
#content li ul,
#content li ol{
margin-top: .5em;
margin-bottom: .5em;
}
#content li li,
#minitoc-area li{
margin-top: 0em;
margin-bottom: 0em;
}
#content .attribution {
text-align: right;
font-style: italic;
font-size: 85%;
margin-top: 1em;
}
.codefrag {
font-family: "Courier New", Courier, monospace;
font-size: 110%;
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
body {
font-family: Georgia, Palatino, serif;
font-size: 12pt;
background: white;
}
#tabs,
#menu,
#content .toc {
display: none;
}
#content {
width: auto;
padding: 0;
float: none !important;
color: black;
background: inherit;
}
a:link, a:visited {
color: #336699;
background: inherit;
text-decoration: underline;
}
#top .logo {
padding: 0;
margin: 0 0 2em 0;
}
#footer {
margin-top: 4em;
}
acronym {
border: 0;
}

View File

@ -0,0 +1,182 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:import href="../../common/css/forrest.css.xslt"/>
<!-- xsl:output is not used by Forrest but makes it possible to debug the
stylesheet in standalone editors -->
<xsl:output method = "text" omit-xml-declaration="yes" />
<!-- ==================== main block colors ============================ -->
<xsl:template match="color[@name='header']">
#top { background-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='tab-selected']">
#top .header .current { background-color: <xsl:value-of select="@value"/>;}
#top .header .current a:link { color: <xsl:value-of select="@link"/>; }
#top .header .current a:visited { color: <xsl:value-of select="@vlink"/>; }
#top .header .current a:hover { color: <xsl:value-of select="@hlink"/>; }
</xsl:template>
<xsl:template match="color[@name='tab-unselected']">
#tabs li { background-color: <xsl:value-of select="@value"/> ;}
#tabs li a:link { color: <xsl:value-of select="@link"/>; }
#tabs li a:visited { color: <xsl:value-of select="@vlink"/>; }
#tabs li a:hover { color: <xsl:value-of select="@hlink"/>; }
</xsl:template>
<xsl:template match="color[@name='subtab-selected']">
#level2tabs a.selected { background-color: <xsl:value-of select="@value"/> ;}
#level2tabs a:link { color: <xsl:value-of select="@link"/>; }
#level2tabs a:visited { color: <xsl:value-of select="@vlink"/>; }
#level2tabs a:hover { color: <xsl:value-of select="@hlink"/>; }
</xsl:template>
<xsl:template match="color[@name='subtab-unselected']">
#level2tabs { background-color: <xsl:value-of select="@value"/>;}
#level2tabs a.unselected:link { color: <xsl:value-of select="@link"/>; }
#level2tabs a.unselected:visited { color: <xsl:value-of select="@vlink"/>; }
#level2tabs a.unselected:hover { color: <xsl:value-of select="@hlink"/>; }
</xsl:template>
<xsl:template match="color[@name='heading']">
.heading { background-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='subheading']">
.boxed { background-color: <xsl:value-of select="@value"/>;}
.underlined_5 {border-bottom: solid 5px <xsl:value-of select="@value"/>;}
.underlined_10 {border-bottom: solid 10px <xsl:value-of select="@value"/>;}
table caption {
background-color: <xsl:value-of select="@value"/>;
color: <xsl:value-of select="@font"/>;
}
</xsl:template>
<xsl:template match="color[@name='feedback']">
#feedback {
color: <xsl:value-of select="@font"/>;
background: <xsl:value-of select="@value"/>;
text-align: <xsl:value-of select="@align"/>;
}
#feedback #feedbackto {
color: <xsl:value-of select="@font"/>;
}
</xsl:template>
<xsl:template match="color[@name='breadtrail']">
#main .breadtrail {
background: <xsl:value-of select="@value"/>;
color: <xsl:value-of select="@font"/>;
}
#main .breadtrail a:link { color: <xsl:value-of select="@link"/>; }
#main .breadtrail a:visited { color: <xsl:value-of select="@vlink"/>; }
#main .breadtrail a:hover { color: <xsl:value-of select="@hlink"/>; }
#top .breadtrail {
background: <xsl:value-of select="@value"/>;
color: <xsl:value-of select="@font"/>;
}
#top .breadtrail a:link { color: <xsl:value-of select="@link"/>; }
#top .breadtrail a:visited { color: <xsl:value-of select="@vlink"/>; }
#top .breadtrail a:hover { color: <xsl:value-of select="@hlink"/>; }
</xsl:template>
<!--Fix for other (old) profiles-->
<xsl:template match="color[@name='navstrip']">
#publishedStrip {
color: <xsl:value-of select="@font"/>;
background: <xsl:value-of select="@value"/>;
}
</xsl:template>
<!--has to go after the nav-strip (no 'navstrip')-->
<xsl:template match="color[@name='published']">
#publishedStrip {
color: <xsl:value-of select="@font"/>;
background: <xsl:value-of select="@value"/>;
}
</xsl:template>
<xsl:template match="color[@name='toolbox']">
#menu .menupagetitle { background-color: <xsl:value-of select="@value"/>;
color: <xsl:value-of select="@font"/>;}
</xsl:template>
<xsl:template match="color[@name='border']">
#menu { border-color: <xsl:value-of select="@value"/>;}
#menu .menupagetitle { border-color: <xsl:value-of select="@value"/>;}
#menu .menupageitemgroup { border-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='menu']">
#menu { background-color: <xsl:value-of select="@value"/>;}
#menu { color: <xsl:value-of select="@font"/>;}
#menu a:link { color: <xsl:value-of select="@link"/>;}
#menu a:visited { color: <xsl:value-of select="@vlink"/>;}
#menu a:hover {
background-color: <xsl:value-of select="@value"/>;
color: <xsl:value-of select="@hlink"/>;}
</xsl:template>
<xsl:template match="color[@name='dialog']">
#menu .menupageitemgroup {
background-color: <xsl:value-of select="@value"/>;
}
#menu .menupageitem {
color: <xsl:value-of select="@font"/>;
}
#menu .menupageitem a:link { color: <xsl:value-of select="@link"/>;}
#menu .menupageitem a:visited { color: <xsl:value-of select="@vlink"/>;}
#menu .menupageitem a:hover {
background-color: <xsl:value-of select="@value"/>;
color: <xsl:value-of select="@hlink"/>;
}
</xsl:template>
<xsl:template match="color[@name='menuheading']">
#menu h1 {
color: <xsl:value-of select="@font"/>;
background-color: <xsl:value-of select="@value"/>;
}
</xsl:template>
<xsl:template match="color[@name='searchbox']">
#top .searchbox {
background-color: <xsl:value-of select="@value"/> ;
color: <xsl:value-of select="@font"/>;
}
</xsl:template>
<xsl:template match="color[@name='body']">
body{
background-color: <xsl:value-of select="@value"/>;
color: <xsl:value-of select="@font"/>;
}
a:link { color:<xsl:value-of select="@link"/>}
a:visited { color:<xsl:value-of select="@vlink"/>}
a:hover { color:<xsl:value-of select="@hlink"/>}
</xsl:template>
<xsl:template match="color[@name='footer']">
#footer { background-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<!-- ==================== other colors ============================ -->
<xsl:template match="color[@name='highlight']">
.highlight { background-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='fixme']">
.fixme { border-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='note']">
.note { border-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='warning']">
.warning { border-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='code']">
.code { border-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='table']">
.ForrestTable { background-color: <xsl:value-of select="@value"/>;}
</xsl:template>
<xsl:template match="color[@name='table-cell']">
.ForrestTable td { background-color: <xsl:value-of select="@value"/>;}
</xsl:template>
</xsl:stylesheet>

View File

@ -0,0 +1,587 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
body { margin: 0px 0px 0px 0px; font-family: Verdana, Helvetica, sans-serif; }
h1 { font-size : 160%; margin: 0px 0px 0px 0px; padding: 0px; }
h2 { font-size : 140%; margin: 1em 0px 0.8em 0px; padding: 0px; font-weight : bold;}
h3 { font-size : 130%; margin: 0.8em 0px 0px 0px; padding: 0px; font-weight : bold; }
.h3 { margin: 22px 0px 3px 0px; }
h4 { font-size : 120%; margin: 0.7em 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; }
.h4 { margin: 18px 0px 0px 0px; }
h4.faq { font-size : 120%; margin: 18px 0px 0px 0px; padding: 0px; font-weight : bold; text-align: left; }
h5 { font-size : 100%; margin: 14px 0px 0px 0px; padding: 0px; font-weight : normal; text-align: left; }
/**
* table
*/
table .title { background-color: #000000; }
.ForrestTable {
color: #ffffff;
background-color: #7099C5;
width: 100%;
font-size : 100%;
empty-cells: show;
}
table caption {
padding-left: 5px;
color: white;
text-align: left;
font-weight: bold;
background-color: #000000;
}
.ForrestTable td {
color: black;
background-color: #f0f0ff;
}
.ForrestTable th { text-align: center; }
/**
* Page Header
*/
#top {
position: relative;
float: left;
width: 100%;
background: #294563; /* if you want a background in the header, put it here */
}
#top .breadtrail {
background: #CFDCED;
color: black;
border-bottom: solid 1px white;
padding: 3px 10px;
font-size: 75%;
}
#top .breadtrail a { color: black; }
#top .header {
float: left;
width: 100%;
background: url("images/header_white_line.gif") repeat-x bottom;
}
#top .grouplogo {
padding: 7px 0 10px 10px;
float: left;
text-align: left;
}
#top .projectlogo {
padding: 7px 0 10px 10px;
float: left;
width: 33%;
text-align: right;
}
#top .projectlogoA1 {
padding: 7px 0 10px 10px;
float: right;
}
html>body #top .searchbox {
bottom: 0px;
}
#top .searchbox {
position: absolute;
right: 10px;
height: 42px;
font-size: 70%;
white-space: nowrap;
text-align: right;
color: white;
background-color: #000000;
z-index:0;
background-image: url(images/rc-t-l-5-1header-2searchbox-3searchbox.png);
background-repeat: no-repeat;
background-position: top left;
bottom: -1px; /* compensate for IE rendering issue */
}
#top .searchbox form {
padding: 5px 10px;
margin: 0;
}
#top .searchbox p {
padding: 0 0 2px 0;
margin: 0;
}
#top .searchbox input {
font-size: 100%;
}
#tabs {
clear: both;
padding-left: 10px;
margin: 0;
list-style: none;
}
/* background: #CFDCED url("images/tab-right.gif") no-repeat right top;*/
#tabs li {
float: left;
background-image: url(images/rc-t-r-5-1header-2tab-unselected-3tab-unselected.png);
background-repeat: no-repeat;
background-position: top right;
background-color: #000000;
margin: 0 3px 0 0;
padding: 0;
}
/*background: url("images/tab-left.gif") no-repeat left top;*/
#tabs li a {
float: left;
display: block;
font-family: verdana, arial, sans-serif;
text-decoration: none;
color: black;
white-space: nowrap;
background-image: url(images/rc-t-l-5-1header-2tab-unselected-3tab-unselected.png);
background-repeat: no-repeat;
background-position: top left;
padding: 5px 15px 4px;
width: .1em; /* IE/Win fix */
}
#tabs li a:hover {
cursor: pointer;
text-decoration:underline;
}
#tabs > li a { width: auto; } /* Rest of IE/Win fix */
/* Commented Backslash Hack hides rule from IE5-Mac \*/
#tabs a { float: none; }
/* End IE5-Mac hack */
#top .header .current {
background-color: #4C6C8F;
background-image: url(images/rc-t-r-5-1header-2tab-selected-3tab-selected.png);
background-repeat: no-repeat;
background-position: top right;
}
#top .header .current a {
font-weight: bold;
padding-bottom: 5px;
color: white;
background-image: url(images/rc-t-l-5-1header-2tab-selected-3tab-selected.png);
background-repeat: no-repeat;
background-position: top left;
}
#publishedStrip {
padding-right: 10px;
padding-left: 20px;
padding-top: 3px;
padding-bottom:3px;
color: #ffffff;
font-size : 60%;
font-weight: bold;
background-color: #4C6C8F;
text-align:right;
}
#level2tabs {
margin: 0;
float:left;
position:relative;
}
#level2tabs a:hover {
cursor: pointer;
text-decoration:underline;
}
#level2tabs a{
cursor: pointer;
text-decoration:none;
background-image: url('images/chapter.gif');
background-repeat: no-repeat;
background-position: center left;
padding-left: 6px;
margin-left: 6px;
}
/*
* border-top: solid #4C6C8F 15px;
*/
#main {
position: relative;
background: white;
clear:both;
}
#main .breadtrail {
clear:both;
position: relative;
background: #CFDCED;
color: black;
border-bottom: solid 1px black;
border-top: solid 1px black;
padding: 0px 180px;
font-size: 75%;
z-index:10;
}
/**
* Round corner
*/
#roundtop {
background-image: url(images/rc-t-r-15-1body-2menu-3menu.png);
background-repeat: no-repeat;
background-position: top right;
}
#roundbottom {
background-image: url(images/rc-b-r-15-1body-2menu-3menu.png);
background-repeat: no-repeat;
background-position: top right;
}
img.corner {
width: 15px;
height: 15px;
border: none;
display: block !important;
}
.roundtopsmall {
background-image: url(images/rc-t-r-5-1header-2searchbox-3searchbox.png);
background-repeat: no-repeat;
background-position: top right;
}
#roundbottomsmall {
background-image: url(images/rc-b-r-5-1header-2tab-selected-3tab-selected.png);
background-repeat: no-repeat;
background-position: top right;
}
img.cornersmall {
width: 5px;
height: 5px;
border: none;
display: block !important;
}
/**
* Side menu
*/
#menu a { font-weight: normal; text-decoration: none;}
#menu a:visited { font-weight: normal; }
#menu a:active { font-weight: normal; }
#menu a:hover { font-weight: normal; text-decoration:underline;}
#menuarea { width:10em;}
#menu {
position: relative;
float: left;
width: 160px;
padding-top: 0px;
top:-18px;
left:10px;
z-index: 20;
background-color: #f90;
font-size : 70%;
}
.menutitle {
cursor:pointer;
padding: 3px 12px;
margin-left: 10px;
background-image: url('images/chapter.gif');
background-repeat: no-repeat;
background-position: center left;
font-weight : bold;
}
.menutitle:hover{text-decoration:underline;cursor: pointer;}
#menu .menuitemgroup {
margin: 0px 0px 6px 8px;
padding: 0px;
font-weight : bold; }
#menu .selectedmenuitemgroup{
margin: 0px 0px 0px 8px;
padding: 0px;
font-weight : normal;
}
#menu .menuitem {
padding: 2px 0px 1px 13px;
background-image: url('images/page.gif');
background-repeat: no-repeat;
background-position: center left;
font-weight : normal;
margin-left: 10px;
}
#menu .menupage {
margin: 2px 0px 1px 10px;
padding: 0px 3px 0px 12px;
background-image: url('images/page.gif');
background-repeat: no-repeat;
background-position: center left;
font-style : normal;
}
#menu .menupagetitle {
padding: 0px 0px 0px 1px;
font-style : normal;
border-style: solid;
border-width: 1px;
margin-right: 10px;
}
#menu .menupageitemgroup {
padding: 3px 0px 4px 6px;
font-style : normal;
border-bottom: 1px solid ;
border-left: 1px solid ;
border-right: 1px solid ;
margin-right: 10px;
}
#menu .menupageitem {
font-style : normal;
font-weight : normal;
border-width: 0px;
font-size : 90%;
}
#menu #credit {
text-align: center;
}
#menu #credit2 {
text-align: center;
padding: 3px 3px 3px 3px;
background-color: #ffffff;
}
#menu .searchbox {
text-align: center;
}
#menu .searchbox form {
padding: 3px 3px;
margin: 0;
}
#menu .searchbox input {
font-size: 100%;
}
#content {
padding: 20px 20px 20px 180px;
margin: 0;
font : small Verdana, Helvetica, sans-serif;
font-size : 80%;
}
#content ul {
margin: 0;
padding: 0 25px;
}
#content li {
padding: 0 5px;
}
#feedback {
color: black;
background: #CFDCED;
text-align:center;
margin-top: 5px;
}
#feedback #feedbackto {
font-size: 90%;
color: black;
}
#footer {
clear: both;
position: relative; /* IE bugfix (http://www.dracos.co.uk/web/css/ie6floatbug/) */
width: 100%;
background: #CFDCED;
border-top: solid 1px #4C6C8F;
color: black;
}
#footer .copyright {
position: relative; /* IE bugfix cont'd */
padding: 5px;
margin: 0;
width: 45%;
}
#footer .lastmodified {
position: relative; /* IE bugfix cont'd */
float: right;
width: 45%;
padding: 5px;
margin: 0;
text-align: right;
}
#footer a { color: white; }
#footer #logos {
text-align: left;
}
/**
* Misc Styles
*/
acronym { cursor: help; }
.boxed { background-color: #a5b6c6;}
.underlined_5 {border-bottom: solid 5px #4C6C8F;}
.underlined_10 {border-bottom: solid 10px #4C6C8F;}
/* ==================== snail trail ============================ */
.trail {
position: relative; /* IE bugfix cont'd */
font-size: 70%;
text-align: right;
float: right;
margin: -10px 5px 0px 5px;
padding: 0;
}
#motd-area {
position: relative; /* IE bugfix cont'd */
float: right;
width: 35%;
background-color: #f0f0ff;
border-top: solid 1px #4C6C8F;
border-bottom: solid 1px #4C6C8F;
margin-bottom: 15px;
margin-left: 15px;
margin-right: 10%;
padding-bottom: 5px;
padding-top: 5px;
}
#minitoc-area {
border-top: solid 1px #4C6C8F;
border-bottom: solid 1px #4C6C8F;
margin: 15px 10% 5px 15px;
/* margin-bottom: 15px;
margin-left: 15px;
margin-right: 10%;*/
padding-bottom: 7px;
padding-top: 5px;
}
.minitoc {
list-style-image: url('images/current.gif');
font-weight: normal;
}
li p {
margin: 0;
padding: 0;
}
.pdflink {
position: relative; /* IE bugfix cont'd */
float: right;
margin: 0px 5px;
padding: 0;
}
.pdflink br {
margin-top: -10px;
padding-left: 1px;
}
.pdflink a {
display: block;
font-size: 70%;
text-align: center;
margin: 0;
padding: 0;
}
.pdflink img {
display: block;
height: 16px;
width: 16px;
}
.xmllink {
position: relative; /* IE bugfix cont'd */
float: right;
margin: 0px 5px;
padding: 0;
}
.xmllink br {
margin-top: -10px;
padding-left: 1px;
}
.xmllink a {
display: block;
font-size: 70%;
text-align: center;
margin: 0;
padding: 0;
}
.xmllink img {
display: block;
height: 16px;
width: 16px;
}
.podlink {
position: relative; /* IE bugfix cont'd */
float: right;
margin: 0px 5px;
padding: 0;
}
.podlink br {
margin-top: -10px;
padding-left: 1px;
}
.podlink a {
display: block;
font-size: 70%;
text-align: center;
margin: 0;
padding: 0;
}
.podlink img {
display: block;
height: 16px;
width: 16px;
}
.printlink {
position: relative; /* IE bugfix cont'd */
float: right;
}
.printlink br {
margin-top: -10px;
padding-left: 1px;
}
.printlink a {
display: block;
font-size: 70%;
text-align: center;
margin: 0;
padding: 0;
}
.printlink img {
display: block;
height: 16px;
width: 16px;
}
p.instruction {
display: list-item;
list-style-image: url('../images/instruction_arrow.png');
list-style-position: outside;
margin-left: 2em;
}

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 49 B

Binary file not shown.

After

Width:  |  Height:  |  Size: 54 B

Some files were not shown because too many files have changed in this diff Show More