From a0b31445afb3da5aa91822535ab23f5713162a5e Mon Sep 17 00:00:00 2001 From: Oleg Kalnichevski Date: Thu, 26 Feb 2015 16:36:00 +0000 Subject: [PATCH] HTTPCLIENT-1613: support of private domains in Mozilla Public Suffix List git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1662492 13f79535-47bb-0310-9956-ffa450edef68 --- .../conn/ssl/DefaultHostnameVerifier.java | 3 +- .../org/apache/http/conn/util/DomainType.java | 38 +++++++ .../http/conn/util/PublicSuffixList.java | 20 +++- .../conn/util/PublicSuffixListParser.java | 101 ++++++++++++----- .../http/conn/util/PublicSuffixMatcher.java | 107 +++++++++++++++--- .../conn/util/PublicSuffixMatcherLoader.java | 5 +- .../conn/ssl/TestDefaultHostnameVerifier.java | 3 +- .../conn/util/TestPublicSuffixListParser.java | 35 +++++- .../cookie/TestBasicCookieAttribHandlers.java | 3 +- httpclient/src/test/resources/suffixlist.txt | 5 - httpclient/src/test/resources/suffixlist2.txt | 39 +++++++ 11 files changed, 304 insertions(+), 55 deletions(-) create mode 100644 httpclient/src/main/java/org/apache/http/conn/util/DomainType.java create mode 100644 httpclient/src/test/resources/suffixlist2.txt diff --git a/httpclient/src/main/java/org/apache/http/conn/ssl/DefaultHostnameVerifier.java b/httpclient/src/main/java/org/apache/http/conn/ssl/DefaultHostnameVerifier.java index 03a1edb91..92369fead 100644 --- a/httpclient/src/main/java/org/apache/http/conn/ssl/DefaultHostnameVerifier.java +++ b/httpclient/src/main/java/org/apache/http/conn/ssl/DefaultHostnameVerifier.java @@ -52,6 +52,7 @@ import javax.security.auth.x500.X500Principal; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.http.annotation.Immutable; +import org.apache.http.conn.util.DomainType; import org.apache.http.conn.util.InetAddressUtils; import org.apache.http.conn.util.PublicSuffixMatcher; @@ -178,7 +179,7 @@ public final class DefaultHostnameVerifier implements HostnameVerifier { final PublicSuffixMatcher publicSuffixMatcher, final boolean strict) { if (publicSuffixMatcher != null && host.contains(".")) { - if (!matchDomainRoot(host, publicSuffixMatcher.getDomainRoot(identity))) { + if (!matchDomainRoot(host, publicSuffixMatcher.getDomainRoot(identity, DomainType.ICANN))) { return false; } } diff --git a/httpclient/src/main/java/org/apache/http/conn/util/DomainType.java b/httpclient/src/main/java/org/apache/http/conn/util/DomainType.java new file mode 100644 index 000000000..34673e2f3 --- /dev/null +++ b/httpclient/src/main/java/org/apache/http/conn/util/DomainType.java @@ -0,0 +1,38 @@ +/* + * ==================================================================== + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * ==================================================================== + * + * This software consists of voluntary contributions made by many + * individuals on behalf of the Apache Software Foundation. For more + * information on the Apache Software Foundation, please see + * . + * + */ +package org.apache.http.conn.util; + +/** + * Domain types differentiated by Mozilla Public Suffix List. + * + * @since 4.5 + */ +public enum DomainType { + + UNKNOWN, ICANN, PRIVATE + +} diff --git a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java index ec15c9d40..dfdd928ce 100644 --- a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java +++ b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixList.java @@ -44,12 +44,28 @@ import org.apache.http.util.Args; @Immutable public final class PublicSuffixList { + private final DomainType type; private final List rules; private final List exceptions; - public PublicSuffixList(final List rules, final List exceptions) { + /** + * @since 4.5 + */ + public PublicSuffixList(final DomainType type, final List rules, final List exceptions) { + this.type = Args.notNull(type, "Domain type"); this.rules = Collections.unmodifiableList(Args.notNull(rules, "Domain suffix rules")); - this.exceptions = Collections.unmodifiableList(Args.notNull(exceptions, "Domain suffix exceptions")); + this.exceptions = Collections.unmodifiableList(exceptions != null ? exceptions : Collections.emptyList()); + } + + public PublicSuffixList(final List rules, final List exceptions) { + this(DomainType.UNKNOWN, rules, exceptions); + } + + /** + * @since 4.5 + */ + public DomainType getType() { + return type; } public List getRules() { diff --git a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixListParser.java b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixListParser.java index 84bbd182f..0bf63e18e 100644 --- a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixListParser.java +++ b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixListParser.java @@ -43,14 +43,14 @@ import org.apache.http.annotation.Immutable; @Immutable public final class PublicSuffixListParser { - private static final int MAX_LINE_LEN = 256; - public PublicSuffixListParser() { } /** - * Parses the public suffix list format. When creating the reader from the file, make sure to - * use the correct encoding (the original list is in UTF-8). + * Parses the public suffix list format. + *

+ * When creating the reader from the file, make sure to use the correct encoding + * (the original list is in UTF-8). * * @param reader the data reader. The caller is responsible for closing the reader. * @throws java.io.IOException on error while reading from list @@ -59,11 +59,9 @@ public final class PublicSuffixListParser { final List rules = new ArrayList(); final List exceptions = new ArrayList(); final BufferedReader r = new BufferedReader(reader); - final StringBuilder sb = new StringBuilder(256); - boolean more = true; - while (more) { - more = readLine(r, sb); - String line = sb.toString(); + + String line; + while ((line = r.readLine()) != null) { if (line.isEmpty()) { continue; } @@ -85,30 +83,81 @@ public final class PublicSuffixListParser { rules.add(line); } } - return new PublicSuffixList(rules, exceptions); + return new PublicSuffixList(DomainType.UNKNOWN, rules, exceptions); } - private boolean readLine(final Reader r, final StringBuilder sb) throws IOException { - sb.setLength(0); - int b; - boolean hitWhitespace = false; - while ((b = r.read()) != -1) { - final char c = (char) b; - if (c == '\n') { - break; + /** + * Parses the public suffix list format by domain type (currently supported ICANN and PRIVATE). + *

+ * When creating the reader from the file, make sure to use the correct encoding + * (the original list is in UTF-8). + * + * @param reader the data reader. The caller is responsible for closing the reader. + * @throws java.io.IOException on error while reading from list + * + * @since 4.5 + */ + public List parseByType(final Reader reader) throws IOException { + final List result = new ArrayList(2); + + final BufferedReader r = new BufferedReader(reader); + final StringBuilder sb = new StringBuilder(256); + + DomainType domainType = null; + List rules = null; + List exceptions = null; + String line; + while ((line = r.readLine()) != null) { + if (line.isEmpty()) { + continue; } - // Each line is only read up to the first whitespace - if (Character.isWhitespace(c)) { - hitWhitespace = true; + if (line.startsWith("//")) { + + if (domainType == null) { + if (line.contains("===BEGIN ICANN DOMAINS===")) { + domainType = DomainType.ICANN; + } else if (line.contains("===BEGIN PRIVATE DOMAINS===")) { + domainType = DomainType.PRIVATE; + } + } else { + if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) { + if (rules != null) { + result.add(new PublicSuffixList(domainType, rules, exceptions)); + } + domainType = null; + rules = null; + exceptions = null; + } + } + + continue; //entire lines can also be commented using // } - if (!hitWhitespace) { - sb.append(c); + if (domainType == null) { + continue; } - if (sb.length() > MAX_LINE_LEN) { - return false; // prevent excess memory usage + + if (line.startsWith(".")) { + line = line.substring(1); // A leading dot is optional + } + // An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule + final boolean isException = line.startsWith("!"); + if (isException) { + line = line.substring(1); + } + + if (isException) { + if (exceptions == null) { + exceptions = new ArrayList(); + } + exceptions.add(line); + } else { + if (rules == null) { + rules = new ArrayList(); + } + rules.add(line); } } - return (b != -1); + return result; } } diff --git a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java index 02393aca4..30c666fbb 100644 --- a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java +++ b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcher.java @@ -28,6 +28,7 @@ package org.apache.http.conn.util; import java.net.IDN; import java.util.Collection; +import java.util.List; import java.util.Locale; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; @@ -48,33 +49,96 @@ import org.apache.http.util.Args; @ThreadSafe public final class PublicSuffixMatcher { - private final Map rules; - private final Map exceptions; + private final Map rules; + private final Map exceptions; public PublicSuffixMatcher(final Collection rules, final Collection exceptions) { + this(DomainType.UNKNOWN, rules, exceptions); + } + + /** + * @since 4.5 + */ + public PublicSuffixMatcher( + final DomainType domainType, final Collection rules, final Collection exceptions) { + Args.notNull(domainType, "Domain type"); Args.notNull(rules, "Domain suffix rules"); - this.rules = new ConcurrentHashMap(rules.size()); + this.rules = new ConcurrentHashMap(rules.size()); for (String rule: rules) { - this.rules.put(rule, rule); + this.rules.put(rule, domainType); } + this.exceptions = new ConcurrentHashMap(); if (exceptions != null) { - this.exceptions = new ConcurrentHashMap(exceptions.size()); for (String exception: exceptions) { - this.exceptions.put(exception, exception); + this.exceptions.put(exception, domainType); } - } else { - this.exceptions = null; } } /** - * Returns registrable part of the domain for the given domain name of {@code null} + * @since 4.5 + */ + public PublicSuffixMatcher(final Collection lists) { + Args.notNull(lists, "Domain suffix lists"); + this.rules = new ConcurrentHashMap(); + this.exceptions = new ConcurrentHashMap(); + for (PublicSuffixList list: lists) { + final DomainType domainType = list.getType(); + final List rules = list.getRules(); + for (String rule: rules) { + this.rules.put(rule, domainType); + } + final List exceptions = list.getExceptions(); + if (exceptions != null) { + for (String exception: exceptions) { + this.exceptions.put(exception, domainType); + } + } + } + } + + private static boolean hasEntry(final Map map, final String rule, final DomainType expectedType) { + if (map == null) { + return false; + } + final DomainType domainType = map.get(rule); + if (domainType == null) { + return false; + } else { + return expectedType == null || domainType.equals(expectedType); + } + } + + private boolean hasRule(final String rule, final DomainType expectedType) { + return hasEntry(this.rules, rule, expectedType); + } + + private boolean hasException(final String exception, final DomainType expectedType) { + return hasEntry(this.exceptions, exception, expectedType); + } + + /** + * Returns registrable part of the domain for the given domain name or {@code null} * if given domain represents a public suffix. * * @param domain * @return domain root */ public String getDomainRoot(final String domain) { + return getDomainRoot(domain, null); + } + + /** + * Returns registrable part of the domain for the given domain name or {@code null} + * if given domain represents a public suffix. + * + * @param domain + * @param expectedType expected domain type or {@code null} if any. + * @return domain root + * + * @since 4.5 + */ + public String getDomainRoot(final String domain, final DomainType expectedType) { if (domain == null) { return null; } @@ -86,11 +150,11 @@ public final class PublicSuffixMatcher { while (segment != null) { // An exception rule takes priority over any other matching rule. - if (this.exceptions != null && this.exceptions.containsKey(IDN.toUnicode(segment))) { + if (hasException(IDN.toUnicode(segment), expectedType)) { return segment; } - if (this.rules.containsKey(IDN.toUnicode(segment))) { + if (hasRule(IDN.toUnicode(segment), expectedType)) { break; } @@ -98,7 +162,7 @@ public final class PublicSuffixMatcher { final String nextSegment = nextdot != -1 ? segment.substring(nextdot + 1) : null; if (nextSegment != null) { - if (this.rules.containsKey("*." + IDN.toUnicode(nextSegment))) { + if (hasRule("*." + IDN.toUnicode(nextSegment), expectedType)) { break; } } @@ -110,11 +174,28 @@ public final class PublicSuffixMatcher { return domainName; } + /** + * Tests whether the given domain matches any of entry from the public suffix list. + */ public boolean matches(final String domain) { + return matches(domain, null); + } + + /** + * Tests whether the given domain matches any of entry from the public suffix list. + * + * @param domain + * @param expectedType expected domain type or {@code null} if any. + * @return {@code true} if the given domain matches any of the public suffixes. + * + * @since 4.5 + */ + public boolean matches(final String domain, final DomainType expectedType) { if (domain == null) { return false; } - final String domainRoot = getDomainRoot(domain.startsWith(".") ? domain.substring(1) : domain); + final String domainRoot = getDomainRoot( + domain.startsWith(".") ? domain.substring(1) : domain, expectedType); return domainRoot == null; } diff --git a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcherLoader.java b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcherLoader.java index 90174064a..3c75f9d3c 100644 --- a/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcherLoader.java +++ b/httpclient/src/main/java/org/apache/http/conn/util/PublicSuffixMatcherLoader.java @@ -33,6 +33,7 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.net.URL; import java.util.Arrays; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -49,9 +50,9 @@ import org.apache.http.util.Args; public final class PublicSuffixMatcherLoader { private static PublicSuffixMatcher load(final InputStream in) throws IOException { - final PublicSuffixList list = new PublicSuffixListParser().parse( + final List lists = new PublicSuffixListParser().parseByType( new InputStreamReader(in, Consts.UTF_8)); - return new PublicSuffixMatcher(list.getRules(), list.getExceptions()); + return new PublicSuffixMatcher(lists); } public static PublicSuffixMatcher load(final URL url) throws IOException { diff --git a/httpclient/src/test/java/org/apache/http/conn/ssl/TestDefaultHostnameVerifier.java b/httpclient/src/test/java/org/apache/http/conn/ssl/TestDefaultHostnameVerifier.java index b809bcb52..2d94e7ab6 100644 --- a/httpclient/src/test/java/org/apache/http/conn/ssl/TestDefaultHostnameVerifier.java +++ b/httpclient/src/test/java/org/apache/http/conn/ssl/TestDefaultHostnameVerifier.java @@ -35,6 +35,7 @@ import java.util.Arrays; import javax.net.ssl.SSLException; +import org.apache.http.conn.util.DomainType; import org.apache.http.conn.util.PublicSuffixMatcher; import org.junit.Assert; import org.junit.Before; @@ -52,7 +53,7 @@ public class TestDefaultHostnameVerifier { @Before public void setup() { impl = new DefaultHostnameVerifier(); - publicSuffixMatcher = new PublicSuffixMatcher(Arrays.asList("com", "co.jp", "gov.uk"), null); + publicSuffixMatcher = new PublicSuffixMatcher(DomainType.ICANN, Arrays.asList("com", "co.jp", "gov.uk"), null); implWithPublicSuffixCheck = new DefaultHostnameVerifier(publicSuffixMatcher); } diff --git a/httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixListParser.java b/httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixListParser.java index 191e9c010..35ac9d57b 100644 --- a/httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixListParser.java +++ b/httpclient/src/test/java/org/apache/http/conn/util/TestPublicSuffixListParser.java @@ -30,6 +30,8 @@ package org.apache.http.conn.util; import java.io.InputStream; import java.io.InputStreamReader; import java.util.Arrays; +import java.util.Collections; +import java.util.List; import org.apache.http.Consts; import org.junit.Assert; @@ -37,12 +39,10 @@ import org.junit.Test; public class TestPublicSuffixListParser { - private static final String SOURCE_FILE = "suffixlist.txt"; - @Test public void testParse() throws Exception { final ClassLoader classLoader = getClass().getClassLoader(); - final InputStream in = classLoader.getResourceAsStream(SOURCE_FILE); + final InputStream in = classLoader.getResourceAsStream("suffixlist.txt"); Assert.assertNotNull(in); final PublicSuffixList suffixList; try { @@ -52,8 +52,35 @@ public class TestPublicSuffixListParser { in.close(); } Assert.assertNotNull(suffixList); - Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp", "no", "h\u00E5.no", "xx"), suffixList.getRules()); + Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp", "no", "h\u00E5.no"), suffixList.getRules()); Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), suffixList.getExceptions()); } + @Test + public void testParseByType() throws Exception { + final ClassLoader classLoader = getClass().getClassLoader(); + final InputStream in = classLoader.getResourceAsStream("suffixlist2.txt"); + Assert.assertNotNull(in); + final List suffixLists; + try { + final PublicSuffixListParser parser = new PublicSuffixListParser(); + suffixLists = parser.parseByType(new InputStreamReader(in, Consts.UTF_8)); + } finally { + in.close(); + } + Assert.assertNotNull(suffixLists); + Assert.assertEquals(2, suffixLists.size()); + final PublicSuffixList publicSuffixList1 = suffixLists.get(0); + Assert.assertNotNull(publicSuffixList1); + Assert.assertEquals(DomainType.ICANN, publicSuffixList1.getType()); + Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp"), publicSuffixList1.getRules()); + Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), publicSuffixList1.getExceptions()); + + final PublicSuffixList publicSuffixList2 = suffixLists.get(1); + Assert.assertNotNull(publicSuffixList2); + Assert.assertEquals(DomainType.PRIVATE, publicSuffixList2.getType()); + Assert.assertEquals(Arrays.asList("googleapis.com", "googlecode.com"), publicSuffixList2.getRules()); + Assert.assertEquals(Collections.emptyList(), publicSuffixList2.getExceptions()); + + } } diff --git a/httpclient/src/test/java/org/apache/http/impl/cookie/TestBasicCookieAttribHandlers.java b/httpclient/src/test/java/org/apache/http/impl/cookie/TestBasicCookieAttribHandlers.java index 7b236fbb2..4da73d176 100644 --- a/httpclient/src/test/java/org/apache/http/impl/cookie/TestBasicCookieAttribHandlers.java +++ b/httpclient/src/test/java/org/apache/http/impl/cookie/TestBasicCookieAttribHandlers.java @@ -34,6 +34,7 @@ import java.util.Date; import java.util.Locale; import org.apache.http.client.utils.DateUtils; +import org.apache.http.conn.util.DomainType; import org.apache.http.conn.util.PublicSuffixMatcher; import org.apache.http.cookie.ClientCookie; import org.apache.http.cookie.CookieAttributeHandler; @@ -498,7 +499,7 @@ public class TestBasicCookieAttribHandlers { public void testPublicSuffixFilter() throws Exception { final BasicClientCookie cookie = new BasicClientCookie("name", "value"); - final PublicSuffixMatcher matcher = new PublicSuffixMatcher(Arrays.asList("co.uk", "com"), null); + final PublicSuffixMatcher matcher = new PublicSuffixMatcher(DomainType.ICANN, Arrays.asList("co.uk", "com"), null); final PublicSuffixDomainFilter h = new PublicSuffixDomainFilter(new RFC2109DomainHandler(), matcher); cookie.setDomain(".co.uk"); diff --git a/httpclient/src/test/resources/suffixlist.txt b/httpclient/src/test/resources/suffixlist.txt index 660da32e9..f5ff28378 100644 --- a/httpclient/src/test/resources/suffixlist.txt +++ b/httpclient/src/test/resources/suffixlist.txt @@ -31,8 +31,3 @@ ac.jp // unicode no hå.no - - -// invalid -xx yy -//zz \ No newline at end of file diff --git a/httpclient/src/test/resources/suffixlist2.txt b/httpclient/src/test/resources/suffixlist2.txt new file mode 100644 index 000000000..b907f80c5 --- /dev/null +++ b/httpclient/src/test/resources/suffixlist2.txt @@ -0,0 +1,39 @@ +// ==================================================================== +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// ==================================================================== +// +// This software consists of voluntary contributions made by many +// individuals on behalf of the Apache Software Foundation. For more +// information on the Apache Software Foundation, please see +// . +// + +// ===BEGIN ICANN DOMAINS=== + +jp +ac.jp +*.tokyo.jp +!metro.tokyo.jp + +// ===END ICANN DOMAINS=== +// ===BEGIN PRIVATE DOMAINS=== + +googleapis.com +googlecode.com + +// ===END PRIVATE DOMAINS===