HTTPCLIENT-1613: support of private domains in Mozilla Public Suffix List

git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1662492 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Oleg Kalnichevski 2015-02-26 16:36:00 +00:00
parent 5927ed1966
commit a0b31445af
11 changed files with 304 additions and 55 deletions

View File

@ -52,6 +52,7 @@ import javax.security.auth.x500.X500Principal;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.http.annotation.Immutable;
import org.apache.http.conn.util.DomainType;
import org.apache.http.conn.util.InetAddressUtils;
import org.apache.http.conn.util.PublicSuffixMatcher;
@ -178,7 +179,7 @@ public final class DefaultHostnameVerifier implements HostnameVerifier {
final PublicSuffixMatcher publicSuffixMatcher,
final boolean strict) {
if (publicSuffixMatcher != null && host.contains(".")) {
if (!matchDomainRoot(host, publicSuffixMatcher.getDomainRoot(identity))) {
if (!matchDomainRoot(host, publicSuffixMatcher.getDomainRoot(identity, DomainType.ICANN))) {
return false;
}
}

View File

@ -0,0 +1,38 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
package org.apache.http.conn.util;
/**
* Domain types differentiated by Mozilla Public Suffix List.
*
* @since 4.5
*/
public enum DomainType {
UNKNOWN, ICANN, PRIVATE
}

View File

@ -44,12 +44,28 @@ import org.apache.http.util.Args;
@Immutable
public final class PublicSuffixList {
private final DomainType type;
private final List<String> rules;
private final List<String> exceptions;
public PublicSuffixList(final List<String> rules, final List<String> exceptions) {
/**
* @since 4.5
*/
public PublicSuffixList(final DomainType type, final List<String> rules, final List<String> exceptions) {
this.type = Args.notNull(type, "Domain type");
this.rules = Collections.unmodifiableList(Args.notNull(rules, "Domain suffix rules"));
this.exceptions = Collections.unmodifiableList(Args.notNull(exceptions, "Domain suffix exceptions"));
this.exceptions = Collections.unmodifiableList(exceptions != null ? exceptions : Collections.<String>emptyList());
}
public PublicSuffixList(final List<String> rules, final List<String> exceptions) {
this(DomainType.UNKNOWN, rules, exceptions);
}
/**
* @since 4.5
*/
public DomainType getType() {
return type;
}
public List<String> getRules() {

View File

@ -43,14 +43,14 @@ import org.apache.http.annotation.Immutable;
@Immutable
public final class PublicSuffixListParser {
private static final int MAX_LINE_LEN = 256;
public PublicSuffixListParser() {
}
/**
* Parses the public suffix list format. When creating the reader from the file, make sure to
* use the correct encoding (the original list is in UTF-8).
* Parses the public suffix list format.
* <p>
* When creating the reader from the file, make sure to use the correct encoding
* (the original list is in UTF-8).
*
* @param reader the data reader. The caller is responsible for closing the reader.
* @throws java.io.IOException on error while reading from list
@ -59,11 +59,9 @@ public final class PublicSuffixListParser {
final List<String> rules = new ArrayList<String>();
final List<String> exceptions = new ArrayList<String>();
final BufferedReader r = new BufferedReader(reader);
final StringBuilder sb = new StringBuilder(256);
boolean more = true;
while (more) {
more = readLine(r, sb);
String line = sb.toString();
String line;
while ((line = r.readLine()) != null) {
if (line.isEmpty()) {
continue;
}
@ -85,30 +83,81 @@ public final class PublicSuffixListParser {
rules.add(line);
}
}
return new PublicSuffixList(rules, exceptions);
return new PublicSuffixList(DomainType.UNKNOWN, rules, exceptions);
}
private boolean readLine(final Reader r, final StringBuilder sb) throws IOException {
sb.setLength(0);
int b;
boolean hitWhitespace = false;
while ((b = r.read()) != -1) {
final char c = (char) b;
if (c == '\n') {
break;
/**
* Parses the public suffix list format by domain type (currently supported ICANN and PRIVATE).
* <p>
* When creating the reader from the file, make sure to use the correct encoding
* (the original list is in UTF-8).
*
* @param reader the data reader. The caller is responsible for closing the reader.
* @throws java.io.IOException on error while reading from list
*
* @since 4.5
*/
public List<PublicSuffixList> parseByType(final Reader reader) throws IOException {
final List<PublicSuffixList> result = new ArrayList<PublicSuffixList>(2);
final BufferedReader r = new BufferedReader(reader);
final StringBuilder sb = new StringBuilder(256);
DomainType domainType = null;
List<String> rules = null;
List<String> exceptions = null;
String line;
while ((line = r.readLine()) != null) {
if (line.isEmpty()) {
continue;
}
// Each line is only read up to the first whitespace
if (Character.isWhitespace(c)) {
hitWhitespace = true;
if (line.startsWith("//")) {
if (domainType == null) {
if (line.contains("===BEGIN ICANN DOMAINS===")) {
domainType = DomainType.ICANN;
} else if (line.contains("===BEGIN PRIVATE DOMAINS===")) {
domainType = DomainType.PRIVATE;
}
if (!hitWhitespace) {
sb.append(c);
} else {
if (line.contains("===END ICANN DOMAINS===") || line.contains("===END PRIVATE DOMAINS===")) {
if (rules != null) {
result.add(new PublicSuffixList(domainType, rules, exceptions));
}
if (sb.length() > MAX_LINE_LEN) {
return false; // prevent excess memory usage
domainType = null;
rules = null;
exceptions = null;
}
}
return (b != -1);
continue; //entire lines can also be commented using //
}
if (domainType == null) {
continue;
}
if (line.startsWith(".")) {
line = line.substring(1); // A leading dot is optional
}
// An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
final boolean isException = line.startsWith("!");
if (isException) {
line = line.substring(1);
}
if (isException) {
if (exceptions == null) {
exceptions = new ArrayList<String>();
}
exceptions.add(line);
} else {
if (rules == null) {
rules = new ArrayList<String>();
}
rules.add(line);
}
}
return result;
}
}

View File

@ -28,6 +28,7 @@ package org.apache.http.conn.util;
import java.net.IDN;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@ -48,33 +49,96 @@ import org.apache.http.util.Args;
@ThreadSafe
public final class PublicSuffixMatcher {
private final Map<String, String> rules;
private final Map<String, String> exceptions;
private final Map<String, DomainType> rules;
private final Map<String, DomainType> exceptions;
public PublicSuffixMatcher(final Collection<String> rules, final Collection<String> exceptions) {
this(DomainType.UNKNOWN, rules, exceptions);
}
/**
* @since 4.5
*/
public PublicSuffixMatcher(
final DomainType domainType, final Collection<String> rules, final Collection<String> exceptions) {
Args.notNull(domainType, "Domain type");
Args.notNull(rules, "Domain suffix rules");
this.rules = new ConcurrentHashMap<String, String>(rules.size());
this.rules = new ConcurrentHashMap<String, DomainType>(rules.size());
for (String rule: rules) {
this.rules.put(rule, rule);
this.rules.put(rule, domainType);
}
this.exceptions = new ConcurrentHashMap<String, DomainType>();
if (exceptions != null) {
this.exceptions = new ConcurrentHashMap<String, String>(exceptions.size());
for (String exception: exceptions) {
this.exceptions.put(exception, exception);
this.exceptions.put(exception, domainType);
}
} else {
this.exceptions = null;
}
}
/**
* Returns registrable part of the domain for the given domain name of {@code null}
* @since 4.5
*/
public PublicSuffixMatcher(final Collection<PublicSuffixList> lists) {
Args.notNull(lists, "Domain suffix lists");
this.rules = new ConcurrentHashMap<String, DomainType>();
this.exceptions = new ConcurrentHashMap<String, DomainType>();
for (PublicSuffixList list: lists) {
final DomainType domainType = list.getType();
final List<String> rules = list.getRules();
for (String rule: rules) {
this.rules.put(rule, domainType);
}
final List<String> exceptions = list.getExceptions();
if (exceptions != null) {
for (String exception: exceptions) {
this.exceptions.put(exception, domainType);
}
}
}
}
private static boolean hasEntry(final Map<String, DomainType> map, final String rule, final DomainType expectedType) {
if (map == null) {
return false;
}
final DomainType domainType = map.get(rule);
if (domainType == null) {
return false;
} else {
return expectedType == null || domainType.equals(expectedType);
}
}
private boolean hasRule(final String rule, final DomainType expectedType) {
return hasEntry(this.rules, rule, expectedType);
}
private boolean hasException(final String exception, final DomainType expectedType) {
return hasEntry(this.exceptions, exception, expectedType);
}
/**
* Returns registrable part of the domain for the given domain name or {@code null}
* if given domain represents a public suffix.
*
* @param domain
* @return domain root
*/
public String getDomainRoot(final String domain) {
return getDomainRoot(domain, null);
}
/**
* Returns registrable part of the domain for the given domain name or {@code null}
* if given domain represents a public suffix.
*
* @param domain
* @param expectedType expected domain type or {@code null} if any.
* @return domain root
*
* @since 4.5
*/
public String getDomainRoot(final String domain, final DomainType expectedType) {
if (domain == null) {
return null;
}
@ -86,11 +150,11 @@ public final class PublicSuffixMatcher {
while (segment != null) {
// An exception rule takes priority over any other matching rule.
if (this.exceptions != null && this.exceptions.containsKey(IDN.toUnicode(segment))) {
if (hasException(IDN.toUnicode(segment), expectedType)) {
return segment;
}
if (this.rules.containsKey(IDN.toUnicode(segment))) {
if (hasRule(IDN.toUnicode(segment), expectedType)) {
break;
}
@ -98,7 +162,7 @@ public final class PublicSuffixMatcher {
final String nextSegment = nextdot != -1 ? segment.substring(nextdot + 1) : null;
if (nextSegment != null) {
if (this.rules.containsKey("*." + IDN.toUnicode(nextSegment))) {
if (hasRule("*." + IDN.toUnicode(nextSegment), expectedType)) {
break;
}
}
@ -110,11 +174,28 @@ public final class PublicSuffixMatcher {
return domainName;
}
/**
* Tests whether the given domain matches any of entry from the public suffix list.
*/
public boolean matches(final String domain) {
return matches(domain, null);
}
/**
* Tests whether the given domain matches any of entry from the public suffix list.
*
* @param domain
* @param expectedType expected domain type or {@code null} if any.
* @return {@code true} if the given domain matches any of the public suffixes.
*
* @since 4.5
*/
public boolean matches(final String domain, final DomainType expectedType) {
if (domain == null) {
return false;
}
final String domainRoot = getDomainRoot(domain.startsWith(".") ? domain.substring(1) : domain);
final String domainRoot = getDomainRoot(
domain.startsWith(".") ? domain.substring(1) : domain, expectedType);
return domainRoot == null;
}

View File

@ -33,6 +33,7 @@ import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -49,9 +50,9 @@ import org.apache.http.util.Args;
public final class PublicSuffixMatcherLoader {
private static PublicSuffixMatcher load(final InputStream in) throws IOException {
final PublicSuffixList list = new PublicSuffixListParser().parse(
final List<PublicSuffixList> lists = new PublicSuffixListParser().parseByType(
new InputStreamReader(in, Consts.UTF_8));
return new PublicSuffixMatcher(list.getRules(), list.getExceptions());
return new PublicSuffixMatcher(lists);
}
public static PublicSuffixMatcher load(final URL url) throws IOException {

View File

@ -35,6 +35,7 @@ import java.util.Arrays;
import javax.net.ssl.SSLException;
import org.apache.http.conn.util.DomainType;
import org.apache.http.conn.util.PublicSuffixMatcher;
import org.junit.Assert;
import org.junit.Before;
@ -52,7 +53,7 @@ public class TestDefaultHostnameVerifier {
@Before
public void setup() {
impl = new DefaultHostnameVerifier();
publicSuffixMatcher = new PublicSuffixMatcher(Arrays.asList("com", "co.jp", "gov.uk"), null);
publicSuffixMatcher = new PublicSuffixMatcher(DomainType.ICANN, Arrays.asList("com", "co.jp", "gov.uk"), null);
implWithPublicSuffixCheck = new DefaultHostnameVerifier(publicSuffixMatcher);
}

View File

@ -30,6 +30,8 @@ package org.apache.http.conn.util;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.http.Consts;
import org.junit.Assert;
@ -37,12 +39,10 @@ import org.junit.Test;
public class TestPublicSuffixListParser {
private static final String SOURCE_FILE = "suffixlist.txt";
@Test
public void testParse() throws Exception {
final ClassLoader classLoader = getClass().getClassLoader();
final InputStream in = classLoader.getResourceAsStream(SOURCE_FILE);
final InputStream in = classLoader.getResourceAsStream("suffixlist.txt");
Assert.assertNotNull(in);
final PublicSuffixList suffixList;
try {
@ -52,8 +52,35 @@ public class TestPublicSuffixListParser {
in.close();
}
Assert.assertNotNull(suffixList);
Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp", "no", "h\u00E5.no", "xx"), suffixList.getRules());
Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp", "no", "h\u00E5.no"), suffixList.getRules());
Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), suffixList.getExceptions());
}
@Test
public void testParseByType() throws Exception {
final ClassLoader classLoader = getClass().getClassLoader();
final InputStream in = classLoader.getResourceAsStream("suffixlist2.txt");
Assert.assertNotNull(in);
final List<PublicSuffixList> suffixLists;
try {
final PublicSuffixListParser parser = new PublicSuffixListParser();
suffixLists = parser.parseByType(new InputStreamReader(in, Consts.UTF_8));
} finally {
in.close();
}
Assert.assertNotNull(suffixLists);
Assert.assertEquals(2, suffixLists.size());
final PublicSuffixList publicSuffixList1 = suffixLists.get(0);
Assert.assertNotNull(publicSuffixList1);
Assert.assertEquals(DomainType.ICANN, publicSuffixList1.getType());
Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp"), publicSuffixList1.getRules());
Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), publicSuffixList1.getExceptions());
final PublicSuffixList publicSuffixList2 = suffixLists.get(1);
Assert.assertNotNull(publicSuffixList2);
Assert.assertEquals(DomainType.PRIVATE, publicSuffixList2.getType());
Assert.assertEquals(Arrays.asList("googleapis.com", "googlecode.com"), publicSuffixList2.getRules());
Assert.assertEquals(Collections.<String>emptyList(), publicSuffixList2.getExceptions());
}
}

View File

@ -34,6 +34,7 @@ import java.util.Date;
import java.util.Locale;
import org.apache.http.client.utils.DateUtils;
import org.apache.http.conn.util.DomainType;
import org.apache.http.conn.util.PublicSuffixMatcher;
import org.apache.http.cookie.ClientCookie;
import org.apache.http.cookie.CookieAttributeHandler;
@ -498,7 +499,7 @@ public class TestBasicCookieAttribHandlers {
public void testPublicSuffixFilter() throws Exception {
final BasicClientCookie cookie = new BasicClientCookie("name", "value");
final PublicSuffixMatcher matcher = new PublicSuffixMatcher(Arrays.asList("co.uk", "com"), null);
final PublicSuffixMatcher matcher = new PublicSuffixMatcher(DomainType.ICANN, Arrays.asList("co.uk", "com"), null);
final PublicSuffixDomainFilter h = new PublicSuffixDomainFilter(new RFC2109DomainHandler(), matcher);
cookie.setDomain(".co.uk");

View File

@ -31,8 +31,3 @@ ac.jp
// unicode
no
hå.no
// invalid
xx yy
//zz

View File

@ -0,0 +1,39 @@
// ====================================================================
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
// ====================================================================
//
// This software consists of voluntary contributions made by many
// individuals on behalf of the Apache Software Foundation. For more
// information on the Apache Software Foundation, please see
// <http://www.apache.org/>.
//
// ===BEGIN ICANN DOMAINS===
jp
ac.jp
*.tokyo.jp
!metro.tokyo.jp
// ===END ICANN DOMAINS===
// ===BEGIN PRIVATE DOMAINS===
googleapis.com
googlecode.com
// ===END PRIVATE DOMAINS===