YARN-4591. YARN Web UIs should provide a robots.txt. (Sidharta Seethana via wangda)

(cherry picked from commit 5a58bfee30)
This commit is contained in:
Wangda Tan 2016-09-20 17:20:50 -07:00
parent 11ed4f5d40
commit 6f6e62ad86
4 changed files with 77 additions and 1 deletions

View File

@ -35,6 +35,7 @@ import org.apache.hadoop.http.HtmlQuoting;
import org.apache.hadoop.yarn.webapp.Controller.RequestContext; import org.apache.hadoop.yarn.webapp.Controller.RequestContext;
import org.apache.hadoop.yarn.webapp.Router.Dest; import org.apache.hadoop.yarn.webapp.Router.Dest;
import org.apache.hadoop.yarn.webapp.view.ErrorPage; import org.apache.hadoop.yarn.webapp.view.ErrorPage;
import org.apache.hadoop.yarn.webapp.view.RobotsTextPage;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -117,6 +118,14 @@ public class Dispatcher extends HttpServlet {
} }
Controller.RequestContext rc = Controller.RequestContext rc =
injector.getInstance(Controller.RequestContext.class); injector.getInstance(Controller.RequestContext.class);
//short-circuit robots.txt serving for all YARN webapps.
if (uri.equals(RobotsTextPage.ROBOTS_TXT_PATH)) {
rc.setStatus(HttpServletResponse.SC_FOUND);
render(RobotsTextPage.class);
return;
}
if (setCookieParams(rc, req) > 0) { if (setCookieParams(rc, req) > 0) {
Cookie ec = rc.cookies().get(ERROR_COOKIE); Cookie ec = rc.cookies().get(ERROR_COOKIE);
if (ec != null) { if (ec != null) {

View File

@ -29,6 +29,7 @@ import java.util.Map;
import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.http.HttpServer2; import org.apache.hadoop.http.HttpServer2;
import org.apache.hadoop.yarn.webapp.view.RobotsTextPage;
import org.slf4j.Logger; import org.slf4j.Logger;
import org.slf4j.LoggerFactory; import org.slf4j.LoggerFactory;
@ -158,7 +159,8 @@ public abstract class WebApp extends ServletModule {
public void configureServlets() { public void configureServlets() {
setup(); setup();
serve("/", "/__stop").with(Dispatcher.class); serve("/", "/__stop", RobotsTextPage.ROBOTS_TXT_PATH)
.with(Dispatcher.class);
for (String path : this.servePathSpecs) { for (String path : this.servePathSpecs) {
serve(path).with(Dispatcher.class); serve(path).with(Dispatcher.class);

View File

@ -0,0 +1,39 @@
/*
* *
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* /
*/
package org.apache.hadoop.yarn.webapp.view;
/**
* Simple class that renders a robot.txt page that disallows crawling.
*/
public class RobotsTextPage extends TextPage {
public static final String ROBOTS_TXT = "robots.txt";
public static final String ROBOTS_TXT_PATH = "/" + ROBOTS_TXT;
static final String USER_AGENT_LINE = "User-agent: *";
static final String DISALLOW_LINE = "Disallow: /";
@Override
public void render() {
putWithoutEscapeHtml(USER_AGENT_LINE);
putWithoutEscapeHtml(DISALLOW_LINE);
}
}

View File

@ -38,6 +38,7 @@ import org.apache.commons.lang.ArrayUtils;
import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.MockApps;
import org.apache.hadoop.yarn.webapp.view.HtmlPage; import org.apache.hadoop.yarn.webapp.view.HtmlPage;
import org.apache.hadoop.yarn.webapp.view.JQueryUI; import org.apache.hadoop.yarn.webapp.view.JQueryUI;
import org.apache.hadoop.yarn.webapp.view.RobotsTextPage;
import org.apache.hadoop.yarn.webapp.view.TextPage; import org.apache.hadoop.yarn.webapp.view.TextPage;
import org.junit.Test; import org.junit.Test;
import org.slf4j.Logger; import org.slf4j.Logger;
@ -260,6 +261,31 @@ public class TestWebApp {
} }
} }
@Test public void testRobotsText() throws Exception {
WebApp app =
WebApps.$for("test", TestWebApp.class, this, "ws").start(new WebApp() {
@Override
public void setup() {
bind(MyTestJAXBContextResolver.class);
bind(MyTestWebService.class);
}
});
String baseUrl = baseUrl(app);
try {
//using system line separator here since that is what
// TextView (via PrintWriter) seems to use.
String[] robotsTxtOutput = getContent(baseUrl +
RobotsTextPage.ROBOTS_TXT).trim().split(System.getProperty("line"
+ ".separator"));
assertEquals(2, robotsTxtOutput.length);
assertEquals("User-agent: *", robotsTxtOutput[0]);
assertEquals("Disallow: /", robotsTxtOutput[1]);
} finally {
app.stop();
}
}
// This is to test the GuiceFilter should only be applied to webAppContext, // This is to test the GuiceFilter should only be applied to webAppContext,
// not to logContext; // not to logContext;
@Test public void testYARNWebAppContext() throws Exception { @Test public void testYARNWebAppContext() throws Exception {