From 5a58bfee30a662b1b556048504f66f9cf00d182a Mon Sep 17 00:00:00 2001 From: Wangda Tan Date: Tue, 20 Sep 2016 17:20:50 -0700 Subject: [PATCH] YARN-4591. YARN Web UIs should provide a robots.txt. (Sidharta Seethana via wangda) --- .../apache/hadoop/yarn/webapp/Dispatcher.java | 9 +++++ .../org/apache/hadoop/yarn/webapp/WebApp.java | 4 +- .../yarn/webapp/view/RobotsTextPage.java | 39 +++++++++++++++++++ .../apache/hadoop/yarn/webapp/TestWebApp.java | 26 +++++++++++++ 4 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/RobotsTextPage.java diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java index 66dd21bbacc..d519dbb4c0d 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/Dispatcher.java @@ -35,6 +35,7 @@ import org.apache.hadoop.http.HtmlQuoting; import org.apache.hadoop.yarn.webapp.Controller.RequestContext; import org.apache.hadoop.yarn.webapp.Router.Dest; import org.apache.hadoop.yarn.webapp.view.ErrorPage; +import org.apache.hadoop.yarn.webapp.view.RobotsTextPage; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -117,6 +118,14 @@ public class Dispatcher extends HttpServlet { } Controller.RequestContext rc = injector.getInstance(Controller.RequestContext.class); + + //short-circuit robots.txt serving for all YARN webapps. + if (uri.equals(RobotsTextPage.ROBOTS_TXT_PATH)) { + rc.setStatus(HttpServletResponse.SC_FOUND); + render(RobotsTextPage.class); + return; + } + if (setCookieParams(rc, req) > 0) { Cookie ec = rc.cookies().get(ERROR_COOKIE); if (ec != null) { diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java index 2c21d1b3129..fe800f08522 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/WebApp.java @@ -29,6 +29,7 @@ import java.util.Map; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.http.HttpServer2; +import org.apache.hadoop.yarn.webapp.view.RobotsTextPage; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -158,7 +159,8 @@ public abstract class WebApp extends ServletModule { public void configureServlets() { setup(); - serve("/", "/__stop").with(Dispatcher.class); + serve("/", "/__stop", RobotsTextPage.ROBOTS_TXT_PATH) + .with(Dispatcher.class); for (String path : this.servePathSpecs) { serve(path).with(Dispatcher.class); diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/RobotsTextPage.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/RobotsTextPage.java new file mode 100644 index 00000000000..b15d492d2f3 --- /dev/null +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/java/org/apache/hadoop/yarn/webapp/view/RobotsTextPage.java @@ -0,0 +1,39 @@ +/* + * * + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * / + */ + +package org.apache.hadoop.yarn.webapp.view; + +/** + * Simple class that renders a robot.txt page that disallows crawling. + */ + +public class RobotsTextPage extends TextPage { + public static final String ROBOTS_TXT = "robots.txt"; + public static final String ROBOTS_TXT_PATH = "/" + ROBOTS_TXT; + + static final String USER_AGENT_LINE = "User-agent: *"; + static final String DISALLOW_LINE = "Disallow: /"; + + @Override + public void render() { + putWithoutEscapeHtml(USER_AGENT_LINE); + putWithoutEscapeHtml(DISALLOW_LINE); + } +} diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java index acec20524bf..deef85590f1 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/test/java/org/apache/hadoop/yarn/webapp/TestWebApp.java @@ -38,6 +38,7 @@ import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.yarn.MockApps; import org.apache.hadoop.yarn.webapp.view.HtmlPage; import org.apache.hadoop.yarn.webapp.view.JQueryUI; +import org.apache.hadoop.yarn.webapp.view.RobotsTextPage; import org.apache.hadoop.yarn.webapp.view.TextPage; import org.junit.Test; import org.slf4j.Logger; @@ -260,6 +261,31 @@ public class TestWebApp { } } + @Test public void testRobotsText() throws Exception { + WebApp app = + WebApps.$for("test", TestWebApp.class, this, "ws").start(new WebApp() { + @Override + public void setup() { + bind(MyTestJAXBContextResolver.class); + bind(MyTestWebService.class); + } + }); + String baseUrl = baseUrl(app); + try { + //using system line separator here since that is what + // TextView (via PrintWriter) seems to use. + String[] robotsTxtOutput = getContent(baseUrl + + RobotsTextPage.ROBOTS_TXT).trim().split(System.getProperty("line" + + ".separator")); + + assertEquals(2, robotsTxtOutput.length); + assertEquals("User-agent: *", robotsTxtOutput[0]); + assertEquals("Disallow: /", robotsTxtOutput[1]); + } finally { + app.stop(); + } + } + // This is to test the GuiceFilter should only be applied to webAppContext, // not to logContext; @Test public void testYARNWebAppContext() throws Exception {