HTTPCLIENT-1613: support of private domains in Mozilla Public Suffix List

git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1662492 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Oleg Kalnichevski 2015-02-26 16:36:00 +00:00
parent 5927ed1966
commit a0b31445af
11 changed files with 304 additions and 55 deletions

View File

@ -52,6 +52,7 @@ import javax.security.auth.x500.X500Principal;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.http.annotation.Immutable; import org.apache.http.annotation.Immutable;
import org.apache.http.conn.util.DomainType;
import org.apache.http.conn.util.InetAddressUtils; import org.apache.http.conn.util.InetAddressUtils;
import org.apache.http.conn.util.PublicSuffixMatcher; import org.apache.http.conn.util.PublicSuffixMatcher;
@ -178,7 +179,7 @@ public final class DefaultHostnameVerifier implements HostnameVerifier {
final PublicSuffixMatcher publicSuffixMatcher, final PublicSuffixMatcher publicSuffixMatcher,
final boolean strict) { final boolean strict) {
if (publicSuffixMatcher != null && host.contains(".")) { if (publicSuffixMatcher != null && host.contains(".")) {
if (!matchDomainRoot(host, publicSuffixMatcher.getDomainRoot(identity))) { if (!matchDomainRoot(host, publicSuffixMatcher.getDomainRoot(identity, DomainType.ICANN))) {
return false; return false;
} }
} }

View File

@ -0,0 +1,38 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
package org.apache.http.conn.util;
/**
* Domain types differentiated by Mozilla Public Suffix List.
*
* @since 4.5
*/
public enum DomainType {
UNKNOWN, ICANN, PRIVATE
}

View File

@ -44,12 +44,28 @@ import org.apache.http.util.Args;
@Immutable @Immutable
public final class PublicSuffixList { public final class PublicSuffixList {
private final DomainType type;
private final List<String> rules; private final List<String> rules;
private final List<String> exceptions; private final List<String> exceptions;
public PublicSuffixList(final List<String> rules, final List<String> exceptions) { /**
* @since 4.5
*/
public PublicSuffixList(final DomainType type, final List<String> rules, final List<String> exceptions) {
this.type = Args.notNull(type, "Domain type");
this.rules = Collections.unmodifiableList(Args.notNull(rules, "Domain suffix rules")); this.rules = Collections.unmodifiableList(Args.notNull(rules, "Domain suffix rules"));
this.exceptions = Collections.unmodifiableList(Args.notNull(exceptions, "Domain suffix exceptions")); this.exceptions = Collections.unmodifiableList(exceptions != null ? exceptions : Collections.<String>emptyList());
}
public PublicSuffixList(final List<String> rules, final List<String> exceptions) {
this(DomainType.UNKNOWN, rules, exceptions);
}
/**
* @since 4.5
*/
public DomainType getType() {
return type;
} }
public List<String> getRules() { public List<String> getRules() {

View File

@ -43,14 +43,14 @@ import org.apache.http.annotation.Immutable;
@Immutable @Immutable
public final class PublicSuffixListParser { public final class PublicSuffixListParser {
private static final int MAX_LINE_LEN = 256;
public PublicSuffixListParser() { public PublicSuffixListParser() {
} }
/** /**
* Parses the public suffix list format. When creating the reader from the file, make sure to * Parses the public suffix list format.
* use the correct encoding (the original list is in UTF-8). * <p>
* When creating the reader from the file, make sure to use the correct encoding
* (the original list is in UTF-8).
* *
* @param reader the data reader. The caller is responsible for closing the reader. * @param reader the data reader. The caller is responsible for closing the reader.
* @throws java.io.IOException on error while reading from list * @throws java.io.IOException on error while reading from list
@ -59,11 +59,9 @@ public final class PublicSuffixListParser {
final List<String> rules = new ArrayList<String>(); final List<String> rules = new ArrayList<String>();
final List<String> exceptions = new ArrayList<String>(); final List<String> exceptions = new ArrayList<String>();
final BufferedReader r = new BufferedReader(reader); final BufferedReader r = new BufferedReader(reader);
final StringBuilder sb = new StringBuilder(256);
boolean more = true; String line;
while (more) { while ((line = r.readLine()) != null) {
more = readLine(r, sb);
String line = sb.toString();
if (line.isEmpty()) { if (line.isEmpty()) {
continue; continue;
} }
@ -85,30 +83,81 @@ public final class PublicSuffixListParser {
rules.add(line); rules.add(line);
} }
} }
return new PublicSuffixList(rules, exceptions); return new PublicSuffixList(DomainType.UNKNOWN, rules, exceptions);
} }
private boolean readLine(final Reader r, final StringBuilder sb) throws IOException { /**
sb.setLength(0); * Parses the public suffix list format by domain type (currently supported ICANN and PRIVATE).
int b; * <p>
boolean hitWhitespace = false; * When creating the reader from the file, make sure to use the correct encoding
while ((b = r.read()) != -1) { * (the original list is in UTF-8).
final char c = (char) b; *
if (c == '\n') { * @param reader the data reader. The caller is responsible for closing the reader.
break; * @throws java.io.IOException on error while reading from list
*
* @since 4.5
*/
public List<PublicSuffixList> parseByType(final Reader reader) throws IOException {
final List<PublicSuffixList> result = new ArrayList<PublicSuffixList>(2);
final BufferedReader r = new BufferedReader(reader);
final StringBuilder sb = new StringBuilder(256);
DomainType domainType = null;
List<String> rules = null;
List<String> exceptions = null;
String line;
while ((line = r.readLine()) != null) {
if (line.isEmpty()) {
continue;
} }
// Each line is only read up to the first whitespace if (line.startsWith("//")) {
if (Character.isWhitespace(c)) {
hitWhitespace = true; if (domainType == null) {
if (line.contains("===BEGIN ICANN DOMAINS===")) {
domainType = DomainType.ICANN;
} else if (line.contains("===BEGIN PRIVATE DOMAINS===")) {
domainType = DomainType.PRIVATE;
}
} else {
if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) {
if (rules != null) {
result.add(new PublicSuffixList(domainType, rules, exceptions));
}
domainType = null;
rules = null;
exceptions = null;
}
}
continue; //entire lines can also be commented using //
} }
if (!hitWhitespace) { if (domainType == null) {
sb.append(c); continue;
} }
if (sb.length() > MAX_LINE_LEN) {
return false; // prevent excess memory usage if (line.startsWith(".")) {
line = line.substring(1); // A leading dot is optional
}
// An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
final boolean isException = line.startsWith("!");
if (isException) {
line = line.substring(1);
}
if (isException) {
if (exceptions == null) {
exceptions = new ArrayList<String>();
}
exceptions.add(line);
} else {
if (rules == null) {
rules = new ArrayList<String>();
}
rules.add(line);
} }
} }
return (b != -1); return result;
} }
} }

View File

@ -28,6 +28,7 @@ package org.apache.http.conn.util;
import java.net.IDN; import java.net.IDN;
import java.util.Collection; import java.util.Collection;
import java.util.List;
import java.util.Locale; import java.util.Locale;
import java.util.Map; import java.util.Map;
import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentHashMap;
@ -48,33 +49,96 @@ import org.apache.http.util.Args;
@ThreadSafe @ThreadSafe
public final class PublicSuffixMatcher { public final class PublicSuffixMatcher {
private final Map<String, String> rules; private final Map<String, DomainType> rules;
private final Map<String, String> exceptions; private final Map<String, DomainType> exceptions;
public PublicSuffixMatcher(final Collection<String> rules, final Collection<String> exceptions) { public PublicSuffixMatcher(final Collection<String> rules, final Collection<String> exceptions) {
this(DomainType.UNKNOWN, rules, exceptions);
}
/**
* @since 4.5
*/
public PublicSuffixMatcher(
final DomainType domainType, final Collection<String> rules, final Collection<String> exceptions) {
Args.notNull(domainType, "Domain type");
Args.notNull(rules, "Domain suffix rules"); Args.notNull(rules, "Domain suffix rules");
this.rules = new ConcurrentHashMap<String, String>(rules.size()); this.rules = new ConcurrentHashMap<String, DomainType>(rules.size());
for (String rule: rules) { for (String rule: rules) {
this.rules.put(rule, rule); this.rules.put(rule, domainType);
} }
this.exceptions = new ConcurrentHashMap<String, DomainType>();
if (exceptions != null) { if (exceptions != null) {
this.exceptions = new ConcurrentHashMap<String, String>(exceptions.size());
for (String exception: exceptions) { for (String exception: exceptions) {
this.exceptions.put(exception, exception); this.exceptions.put(exception, domainType);
} }
} else {
this.exceptions = null;
} }
} }
/** /**
* Returns registrable part of the domain for the given domain name of {@code null} * @since 4.5
*/
public PublicSuffixMatcher(final Collection<PublicSuffixList> lists) {
Args.notNull(lists, "Domain suffix lists");
this.rules = new ConcurrentHashMap<String, DomainType>();
this.exceptions = new ConcurrentHashMap<String, DomainType>();
for (PublicSuffixList list: lists) {
final DomainType domainType = list.getType();
final List<String> rules = list.getRules();
for (String rule: rules) {
this.rules.put(rule, domainType);
}
final List<String> exceptions = list.getExceptions();
if (exceptions != null) {
for (String exception: exceptions) {
this.exceptions.put(exception, domainType);
}
}
}
}
private static boolean hasEntry(final Map<String, DomainType> map, final String rule, final DomainType expectedType) {
if (map == null) {
return false;
}
final DomainType domainType = map.get(rule);
if (domainType == null) {
return false;
} else {
return expectedType == null || domainType.equals(expectedType);
}
}
private boolean hasRule(final String rule, final DomainType expectedType) {
return hasEntry(this.rules, rule, expectedType);
}
private boolean hasException(final String exception, final DomainType expectedType) {
return hasEntry(this.exceptions, exception, expectedType);
}
/**
* Returns registrable part of the domain for the given domain name or {@code null}
* if given domain represents a public suffix. * if given domain represents a public suffix.
* *
* @param domain * @param domain
* @return domain root * @return domain root
*/ */
public String getDomainRoot(final String domain) { public String getDomainRoot(final String domain) {
return getDomainRoot(domain, null);
}
/**
* Returns registrable part of the domain for the given domain name or {@code null}
* if given domain represents a public suffix.
*
* @param domain
* @param expectedType expected domain type or {@code null} if any.
* @return domain root
*
* @since 4.5
*/
public String getDomainRoot(final String domain, final DomainType expectedType) {
if (domain == null) { if (domain == null) {
return null; return null;
} }
@ -86,11 +150,11 @@ public final class PublicSuffixMatcher {
while (segment != null) { while (segment != null) {
// An exception rule takes priority over any other matching rule. // An exception rule takes priority over any other matching rule.
if (this.exceptions != null && this.exceptions.containsKey(IDN.toUnicode(segment))) { if (hasException(IDN.toUnicode(segment), expectedType)) {
return segment; return segment;
} }
if (this.rules.containsKey(IDN.toUnicode(segment))) { if (hasRule(IDN.toUnicode(segment), expectedType)) {
break; break;
} }
@ -98,7 +162,7 @@ public final class PublicSuffixMatcher {
final String nextSegment = nextdot != -1 ? segment.substring(nextdot + 1) : null; final String nextSegment = nextdot != -1 ? segment.substring(nextdot + 1) : null;
if (nextSegment != null) { if (nextSegment != null) {
if (this.rules.containsKey("*." + IDN.toUnicode(nextSegment))) { if (hasRule("*." + IDN.toUnicode(nextSegment), expectedType)) {
break; break;
} }
} }
@ -110,11 +174,28 @@ public final class PublicSuffixMatcher {
return domainName; return domainName;
} }
/**
* Tests whether the given domain matches any of entry from the public suffix list.
*/
public boolean matches(final String domain) { public boolean matches(final String domain) {
return matches(domain, null);
}
/**
* Tests whether the given domain matches any of entry from the public suffix list.
*
* @param domain
* @param expectedType expected domain type or {@code null} if any.
* @return {@code true} if the given domain matches any of the public suffixes.
*
* @since 4.5
*/
public boolean matches(final String domain, final DomainType expectedType) {
if (domain == null) { if (domain == null) {
return false; return false;
} }
final String domainRoot = getDomainRoot(domain.startsWith(".") ? domain.substring(1) : domain); final String domainRoot = getDomainRoot(
domain.startsWith(".") ? domain.substring(1) : domain, expectedType);
return domainRoot == null; return domainRoot == null;
} }

View File

@ -33,6 +33,7 @@ import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.net.URL; import java.net.URL;
import java.util.Arrays; import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -49,9 +50,9 @@ import org.apache.http.util.Args;
public final class PublicSuffixMatcherLoader { public final class PublicSuffixMatcherLoader {
private static PublicSuffixMatcher load(final InputStream in) throws IOException { private static PublicSuffixMatcher load(final InputStream in) throws IOException {
final PublicSuffixList list = new PublicSuffixListParser().parse( final List<PublicSuffixList> lists = new PublicSuffixListParser().parseByType(
new InputStreamReader(in, Consts.UTF_8)); new InputStreamReader(in, Consts.UTF_8));
return new PublicSuffixMatcher(list.getRules(), list.getExceptions()); return new PublicSuffixMatcher(lists);
} }
public static PublicSuffixMatcher load(final URL url) throws IOException { public static PublicSuffixMatcher load(final URL url) throws IOException {

View File

@ -35,6 +35,7 @@ import java.util.Arrays;
import javax.net.ssl.SSLException; import javax.net.ssl.SSLException;
import org.apache.http.conn.util.DomainType;
import org.apache.http.conn.util.PublicSuffixMatcher; import org.apache.http.conn.util.PublicSuffixMatcher;
import org.junit.Assert; import org.junit.Assert;
import org.junit.Before; import org.junit.Before;
@ -52,7 +53,7 @@ public class TestDefaultHostnameVerifier {
@Before @Before
public void setup() { public void setup() {
impl = new DefaultHostnameVerifier(); impl = new DefaultHostnameVerifier();
publicSuffixMatcher = new PublicSuffixMatcher(Arrays.asList("com", "co.jp", "gov.uk"), null); publicSuffixMatcher = new PublicSuffixMatcher(DomainType.ICANN, Arrays.asList("com", "co.jp", "gov.uk"), null);
implWithPublicSuffixCheck = new DefaultHostnameVerifier(publicSuffixMatcher); implWithPublicSuffixCheck = new DefaultHostnameVerifier(publicSuffixMatcher);
} }

View File

@ -30,6 +30,8 @@ package org.apache.http.conn.util;
import java.io.InputStream; import java.io.InputStream;
import java.io.InputStreamReader; import java.io.InputStreamReader;
import java.util.Arrays; import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.http.Consts; import org.apache.http.Consts;
import org.junit.Assert; import org.junit.Assert;
@ -37,12 +39,10 @@ import org.junit.Test;
public class TestPublicSuffixListParser { public class TestPublicSuffixListParser {
private static final String SOURCE_FILE = "suffixlist.txt";
@Test @Test
public void testParse() throws Exception { public void testParse() throws Exception {
final ClassLoader classLoader = getClass().getClassLoader(); final ClassLoader classLoader = getClass().getClassLoader();
final InputStream in = classLoader.getResourceAsStream(SOURCE_FILE); final InputStream in = classLoader.getResourceAsStream("suffixlist.txt");
Assert.assertNotNull(in); Assert.assertNotNull(in);
final PublicSuffixList suffixList; final PublicSuffixList suffixList;
try { try {
@ -52,8 +52,35 @@ public class TestPublicSuffixListParser {
in.close(); in.close();
} }
Assert.assertNotNull(suffixList); Assert.assertNotNull(suffixList);
Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp", "no", "h\u00E5.no", "xx"), suffixList.getRules()); Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp", "no", "h\u00E5.no"), suffixList.getRules());
Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), suffixList.getExceptions()); Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), suffixList.getExceptions());
} }
@Test
public void testParseByType() throws Exception {
final ClassLoader classLoader = getClass().getClassLoader();
final InputStream in = classLoader.getResourceAsStream("suffixlist2.txt");
Assert.assertNotNull(in);
final List<PublicSuffixList> suffixLists;
try {
final PublicSuffixListParser parser = new PublicSuffixListParser();
suffixLists = parser.parseByType(new InputStreamReader(in, Consts.UTF_8));
} finally {
in.close();
}
Assert.assertNotNull(suffixLists);
Assert.assertEquals(2, suffixLists.size());
final PublicSuffixList publicSuffixList1 = suffixLists.get(0);
Assert.assertNotNull(publicSuffixList1);
Assert.assertEquals(DomainType.ICANN, publicSuffixList1.getType());
Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp"), publicSuffixList1.getRules());
Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), publicSuffixList1.getExceptions());
final PublicSuffixList publicSuffixList2 = suffixLists.get(1);
Assert.assertNotNull(publicSuffixList2);
Assert.assertEquals(DomainType.PRIVATE, publicSuffixList2.getType());
Assert.assertEquals(Arrays.asList("googleapis.com", "googlecode.com"), publicSuffixList2.getRules());
Assert.assertEquals(Collections.<String>emptyList(), publicSuffixList2.getExceptions());
}
} }

View File

@ -34,6 +34,7 @@ import java.util.Date;
import java.util.Locale; import java.util.Locale;
import org.apache.http.client.utils.DateUtils; import org.apache.http.client.utils.DateUtils;
import org.apache.http.conn.util.DomainType;
import org.apache.http.conn.util.PublicSuffixMatcher; import org.apache.http.conn.util.PublicSuffixMatcher;
import org.apache.http.cookie.ClientCookie; import org.apache.http.cookie.ClientCookie;
import org.apache.http.cookie.CookieAttributeHandler; import org.apache.http.cookie.CookieAttributeHandler;
@ -498,7 +499,7 @@ public class TestBasicCookieAttribHandlers {
public void testPublicSuffixFilter() throws Exception { public void testPublicSuffixFilter() throws Exception {
final BasicClientCookie cookie = new BasicClientCookie("name", "value"); final BasicClientCookie cookie = new BasicClientCookie("name", "value");
final PublicSuffixMatcher matcher = new PublicSuffixMatcher(Arrays.asList("co.uk", "com"), null); final PublicSuffixMatcher matcher = new PublicSuffixMatcher(DomainType.ICANN, Arrays.asList("co.uk", "com"), null);
final PublicSuffixDomainFilter h = new PublicSuffixDomainFilter(new RFC2109DomainHandler(), matcher); final PublicSuffixDomainFilter h = new PublicSuffixDomainFilter(new RFC2109DomainHandler(), matcher);
cookie.setDomain(".co.uk"); cookie.setDomain(".co.uk");

View File

@ -31,8 +31,3 @@ ac.jp
// unicode // unicode
no no
hå.no hå.no
// invalid
xx yy
//zz

View File

@ -0,0 +1,39 @@
// ====================================================================
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// ====================================================================
//
// This software consists of voluntary contributions made by many
// individuals on behalf of the Apache Software Foundation. For more
// information on the Apache Software Foundation, please see
// <http://www.apache.org/>.
//
// ===BEGIN ICANN DOMAINS===
jp
ac.jp
*.tokyo.jp
!metro.tokyo.jp
// ===END ICANN DOMAINS===
// ===BEGIN PRIVATE DOMAINS===
googleapis.com
googlecode.com
// ===END PRIVATE DOMAINS===