From 3cd94a86f3920738fac0ddf48b4a8a5617b58924 Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Thu, 28 Aug 2014 19:08:09 +0000 Subject: [PATCH] Factored Public Suffix matching code into a separate utility class git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1621182 13f79535-47bb-0310-9956-ffa450edef68 --- .../http/conn/util/PublicSuffixList.java | 6 +- .../http/conn/util/PublicSuffixMatcher.java | 101 ++++++++++++++++++ .../http/impl/cookie/PublicSuffixFilter.java | 51 ++------- .../conn/util/TestPublicSuffixMatcher.java | 85 +++++++++++++++ 4 files changed, 202 insertions(+), 41 deletions(-) create mode 100644 httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java create mode 100644 httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixMatcher.java diff --git a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java index dbba1f819..ec15c9d40 100644 --- a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java +++ b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java @@ -33,7 +33,11 @@ import org.apache.http.annotation.Immutable; import org.apache.http.util.Args; /** - * Public suffix list from publicsuffix.org. + * Public suffix is a set of DNS names or wildcards concatenated with dots. It represents + * the part of a domain name which is not under the control of the individual registrant + *

+ * An up-to-date list of suffixes can be obtained from + * publicsuffix.org * * @since 4.4 */ diff --git a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java new file mode 100644 index 000000000..248df90f8 --- /dev/null +++ b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java @@ -0,0 +1,101 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ +package org.apache.http.conn.util; + +import java.net.IDN; +import java.util.Collection; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +import org.apache.http.annotation.ThreadSafe; +import org.apache.http.util.Args; + +/** + * Utility class that can test if DNS names match the content of the Public Suffix List. + *

+ * An up-to-date list of suffixes can be obtained from + * publicsuffix.org + * + * @see org.apache.http.conn.util.PublicSuffixList + * + * @since 4.4 + */ +@ThreadSafe +public final class PublicSuffixMatcher { + + private final Map rules; + private final Map exceptions; + + public PublicSuffixMatcher(final Collection rules, final Collection exceptions) { + Args.notNull(rules, "Domain suffix rules"); + this.rules = new ConcurrentHashMap(rules.size()); + for (String rule: rules) { + this.rules.put(rule, rule); + } + if (exceptions != null) { + this.exceptions = new ConcurrentHashMap(exceptions.size()); + for (String exception: exceptions) { + this.exceptions.put(exception, exception); + } + } else { + this.exceptions = null; + } + } + + public boolean match(final String domain) { + String s = domain; + if (s == null) { + return false; + } + if (s.startsWith(".")) { + s = s.substring(1); + } + s = IDN.toUnicode(s); + + // An exception rule takes priority over any other matching rule. + if (this.exceptions != null && this.exceptions.containsKey(s)) { + return false; + } + + do { + if (this.rules.containsKey(s)) { + return true; + } + // patterns + if (s.startsWith("*.")) { + s = s.substring(2); + } + final int nextdot = s.indexOf('.'); + if (nextdot == -1) { + break; + } + s = "*" + s.substring(nextdot); + } while (!s.isEmpty()); + + return false; + } +} diff --git a/httpclient/src/main/java/org/apache/http/impl/cookie/PublicSuffixFilter.java b/httpclient/src/main/java/org/apache/http/impl/cookie/PublicSuffixFilter.java index 50e9cfc5d..00aa2f800 100644 --- a/httpclient/src/main/java/org/apache/http/impl/cookie/PublicSuffixFilter.java +++ b/httpclient/src/main/java/org/apache/http/impl/cookie/PublicSuffixFilter.java @@ -26,11 +26,9 @@ */ package org.apache.http.impl.cookie; -import java.net.IDN; import java.util.Collection; -import java.util.HashSet; -import java.util.Set; +import org.apache.http.conn.util.PublicSuffixMatcher; import org.apache.http.cookie.Cookie; import org.apache.http.cookie.CookieAttributeHandler; import org.apache.http.cookie.CookieOrigin; @@ -49,8 +47,9 @@ import org.apache.http.cookie.SetCookie; */ public class PublicSuffixFilter implements CookieAttributeHandler { private final CookieAttributeHandler wrapped; - private Set exceptions; - private Set suffixes; + private Collection exceptions; + private Collection suffixes; + private PublicSuffixMatcher matcher; public PublicSuffixFilter(final CookieAttributeHandler wrapped) { this.wrapped = wrapped; @@ -63,7 +62,8 @@ public class PublicSuffixFilter implements CookieAttributeHandler { * @param suffixes */ public void setPublicSuffixes(final Collection suffixes) { - this.suffixes = new HashSet(suffixes); + this.suffixes = suffixes; + this.matcher = null; } /** @@ -72,7 +72,8 @@ public class PublicSuffixFilter implements CookieAttributeHandler { * @param exceptions */ public void setExceptions(final Collection exceptions) { - this.exceptions = new HashSet(exceptions); + this.exceptions = exceptions; + this.matcher = null; } /** @@ -97,39 +98,9 @@ public class PublicSuffixFilter implements CookieAttributeHandler { } private boolean isForPublicSuffix(final Cookie cookie) { - String domain = cookie.getDomain(); - if (domain.startsWith(".")) { - domain = domain.substring(1); + if (matcher == null) { + matcher = new PublicSuffixMatcher(this.suffixes, this.exceptions); } - domain = IDN.toUnicode(domain); - - // An exception rule takes priority over any other matching rule. - if (this.exceptions != null) { - if (this.exceptions.contains(domain)) { - return false; - } - } - - - if (this.suffixes == null) { - return false; - } - - do { - if (this.suffixes.contains(domain)) { - return true; - } - // patterns - if (domain.startsWith("*.")) { - domain = domain.substring(2); - } - final int nextdot = domain.indexOf('.'); - if (nextdot == -1) { - break; - } - domain = "*" + domain.substring(nextdot); - } while (!domain.isEmpty()); - - return false; + return matcher.match(cookie.getDomain()); } } diff --git a/httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixMatcher.java b/httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixMatcher.java new file mode 100644 index 000000000..894408e20 --- /dev/null +++ b/httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixMatcher.java @@ -0,0 +1,85 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ + +package org.apache.http.conn.util; + +import java.io.InputStream; +import java.io.InputStreamReader; + +import org.apache.http.Consts; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +public class TestPublicSuffixMatcher { + + private static final String SOURCE_FILE = "suffixlist.txt"; + + private PublicSuffixMatcher matcher; + + @Before + public void setUp() throws Exception { + final ClassLoader classLoader = getClass().getClassLoader(); + final InputStream in = classLoader.getResourceAsStream(SOURCE_FILE); + Assert.assertNotNull(in); + final PublicSuffixList suffixList; + try { + final PublicSuffixListParser parser = new PublicSuffixListParser(); + suffixList = parser.parse(new InputStreamReader(in, Consts.UTF_8)); + } finally { + in.close(); + } + matcher = new PublicSuffixMatcher(suffixList.getRules(), suffixList.getExceptions()); + } + + @Test + public void testParse() throws Exception { + Assert.assertTrue(matcher.match(".jp")); + Assert.assertTrue(matcher.match(".ac.jp")); + Assert.assertTrue(matcher.match(".any.tokyo.jp")); + // exception + Assert.assertFalse(matcher.match(".metro.tokyo.jp")); + } + + @Test + public void testUnicode() throws Exception { + Assert.assertTrue(matcher.match(".h\u00E5.no")); // \u00E5 is + Assert.assertTrue(matcher.match(".xn--h-2fa.no")); + Assert.assertTrue(matcher.match(".h\u00E5.no")); + Assert.assertTrue(matcher.match(".xn--h-2fa.no")); + } + + @Test + public void testWhitespace() throws Exception { + Assert.assertTrue(matcher.match(".xx")); + // yy appears after whitespace + Assert.assertFalse(matcher.match(".yy")); + // zz is commented + Assert.assertFalse(matcher.match(".zz")); + } + +}