Factored Public Suffix matching code into a separate utility class
git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1621182 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1e2d2f38a0
commit
3cd94a86f3
|
@ -33,7 +33,11 @@ import org.apache.http.annotation.Immutable;
|
||||||
import org.apache.http.util.Args;
|
import org.apache.http.util.Args;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Public suffix list from <a href="http://publicsuffix.org/">publicsuffix.org</a>.
|
* Public suffix is a set of DNS names or wildcards concatenated with dots. It represents
|
||||||
|
* the part of a domain name which is not under the control of the individual registrant
|
||||||
|
* <p>
|
||||||
|
* An up-to-date list of suffixes can be obtained from
|
||||||
|
* <a href="http://publicsuffix.org/">publicsuffix.org</a>
|
||||||
*
|
*
|
||||||
* @since 4.4
|
* @since 4.4
|
||||||
*/
|
*/
|
||||||
|
|
|
@ -0,0 +1,101 @@
|
||||||
|
/*
|
||||||
|
* ====================================================================
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
package org.apache.http.conn.util;
|
||||||
|
|
||||||
|
import java.net.IDN;
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
|
||||||
|
import org.apache.http.annotation.ThreadSafe;
|
||||||
|
import org.apache.http.util.Args;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class that can test if DNS names match the content of the Public Suffix List.
|
||||||
|
* <p>
|
||||||
|
* An up-to-date list of suffixes can be obtained from
|
||||||
|
* <a href="http://publicsuffix.org/">publicsuffix.org</a>
|
||||||
|
*
|
||||||
|
* @see org.apache.http.conn.util.PublicSuffixList
|
||||||
|
*
|
||||||
|
* @since 4.4
|
||||||
|
*/
|
||||||
|
@ThreadSafe
|
||||||
|
public final class PublicSuffixMatcher {
|
||||||
|
|
||||||
|
private final Map<String, String> rules;
|
||||||
|
private final Map<String, String> exceptions;
|
||||||
|
|
||||||
|
public PublicSuffixMatcher(final Collection<String> rules, final Collection<String> exceptions) {
|
||||||
|
Args.notNull(rules, "Domain suffix rules");
|
||||||
|
this.rules = new ConcurrentHashMap<String, String>(rules.size());
|
||||||
|
for (String rule: rules) {
|
||||||
|
this.rules.put(rule, rule);
|
||||||
|
}
|
||||||
|
if (exceptions != null) {
|
||||||
|
this.exceptions = new ConcurrentHashMap<String, String>(exceptions.size());
|
||||||
|
for (String exception: exceptions) {
|
||||||
|
this.exceptions.put(exception, exception);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
this.exceptions = null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean match(final String domain) {
|
||||||
|
String s = domain;
|
||||||
|
if (s == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (s.startsWith(".")) {
|
||||||
|
s = s.substring(1);
|
||||||
|
}
|
||||||
|
s = IDN.toUnicode(s);
|
||||||
|
|
||||||
|
// An exception rule takes priority over any other matching rule.
|
||||||
|
if (this.exceptions != null && this.exceptions.containsKey(s)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
do {
|
||||||
|
if (this.rules.containsKey(s)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// patterns
|
||||||
|
if (s.startsWith("*.")) {
|
||||||
|
s = s.substring(2);
|
||||||
|
}
|
||||||
|
final int nextdot = s.indexOf('.');
|
||||||
|
if (nextdot == -1) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
s = "*" + s.substring(nextdot);
|
||||||
|
} while (!s.isEmpty());
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
|
@ -26,11 +26,9 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.http.impl.cookie;
|
package org.apache.http.impl.cookie;
|
||||||
|
|
||||||
import java.net.IDN;
|
|
||||||
import java.util.Collection;
|
import java.util.Collection;
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
|
import org.apache.http.conn.util.PublicSuffixMatcher;
|
||||||
import org.apache.http.cookie.Cookie;
|
import org.apache.http.cookie.Cookie;
|
||||||
import org.apache.http.cookie.CookieAttributeHandler;
|
import org.apache.http.cookie.CookieAttributeHandler;
|
||||||
import org.apache.http.cookie.CookieOrigin;
|
import org.apache.http.cookie.CookieOrigin;
|
||||||
|
@ -49,8 +47,9 @@ import org.apache.http.cookie.SetCookie;
|
||||||
*/
|
*/
|
||||||
public class PublicSuffixFilter implements CookieAttributeHandler {
|
public class PublicSuffixFilter implements CookieAttributeHandler {
|
||||||
private final CookieAttributeHandler wrapped;
|
private final CookieAttributeHandler wrapped;
|
||||||
private Set<String> exceptions;
|
private Collection<String> exceptions;
|
||||||
private Set<String> suffixes;
|
private Collection<String> suffixes;
|
||||||
|
private PublicSuffixMatcher matcher;
|
||||||
|
|
||||||
public PublicSuffixFilter(final CookieAttributeHandler wrapped) {
|
public PublicSuffixFilter(final CookieAttributeHandler wrapped) {
|
||||||
this.wrapped = wrapped;
|
this.wrapped = wrapped;
|
||||||
|
@ -63,7 +62,8 @@ public class PublicSuffixFilter implements CookieAttributeHandler {
|
||||||
* @param suffixes
|
* @param suffixes
|
||||||
*/
|
*/
|
||||||
public void setPublicSuffixes(final Collection<String> suffixes) {
|
public void setPublicSuffixes(final Collection<String> suffixes) {
|
||||||
this.suffixes = new HashSet<String>(suffixes);
|
this.suffixes = suffixes;
|
||||||
|
this.matcher = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -72,7 +72,8 @@ public class PublicSuffixFilter implements CookieAttributeHandler {
|
||||||
* @param exceptions
|
* @param exceptions
|
||||||
*/
|
*/
|
||||||
public void setExceptions(final Collection<String> exceptions) {
|
public void setExceptions(final Collection<String> exceptions) {
|
||||||
this.exceptions = new HashSet<String>(exceptions);
|
this.exceptions = exceptions;
|
||||||
|
this.matcher = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -97,39 +98,9 @@ public class PublicSuffixFilter implements CookieAttributeHandler {
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean isForPublicSuffix(final Cookie cookie) {
|
private boolean isForPublicSuffix(final Cookie cookie) {
|
||||||
String domain = cookie.getDomain();
|
if (matcher == null) {
|
||||||
if (domain.startsWith(".")) {
|
matcher = new PublicSuffixMatcher(this.suffixes, this.exceptions);
|
||||||
domain = domain.substring(1);
|
|
||||||
}
|
}
|
||||||
domain = IDN.toUnicode(domain);
|
return matcher.match(cookie.getDomain());
|
||||||
|
|
||||||
// An exception rule takes priority over any other matching rule.
|
|
||||||
if (this.exceptions != null) {
|
|
||||||
if (this.exceptions.contains(domain)) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (this.suffixes == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
do {
|
|
||||||
if (this.suffixes.contains(domain)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
// patterns
|
|
||||||
if (domain.startsWith("*.")) {
|
|
||||||
domain = domain.substring(2);
|
|
||||||
}
|
|
||||||
final int nextdot = domain.indexOf('.');
|
|
||||||
if (nextdot == -1) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
domain = "*" + domain.substring(nextdot);
|
|
||||||
} while (!domain.isEmpty());
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,85 @@
|
||||||
|
/*
|
||||||
|
* ====================================================================
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.http.conn.util;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
|
||||||
|
import org.apache.http.Consts;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Before;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestPublicSuffixMatcher {
|
||||||
|
|
||||||
|
private static final String SOURCE_FILE = "suffixlist.txt";
|
||||||
|
|
||||||
|
private PublicSuffixMatcher matcher;
|
||||||
|
|
||||||
|
@Before
|
||||||
|
public void setUp() throws Exception {
|
||||||
|
final ClassLoader classLoader = getClass().getClassLoader();
|
||||||
|
final InputStream in = classLoader.getResourceAsStream(SOURCE_FILE);
|
||||||
|
Assert.assertNotNull(in);
|
||||||
|
final PublicSuffixList suffixList;
|
||||||
|
try {
|
||||||
|
final PublicSuffixListParser parser = new PublicSuffixListParser();
|
||||||
|
suffixList = parser.parse(new InputStreamReader(in, Consts.UTF_8));
|
||||||
|
} finally {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
matcher = new PublicSuffixMatcher(suffixList.getRules(), suffixList.getExceptions());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParse() throws Exception {
|
||||||
|
Assert.assertTrue(matcher.match(".jp"));
|
||||||
|
Assert.assertTrue(matcher.match(".ac.jp"));
|
||||||
|
Assert.assertTrue(matcher.match(".any.tokyo.jp"));
|
||||||
|
// exception
|
||||||
|
Assert.assertFalse(matcher.match(".metro.tokyo.jp"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testUnicode() throws Exception {
|
||||||
|
Assert.assertTrue(matcher.match(".h\u00E5.no")); // \u00E5 is <aring>
|
||||||
|
Assert.assertTrue(matcher.match(".xn--h-2fa.no"));
|
||||||
|
Assert.assertTrue(matcher.match(".h\u00E5.no"));
|
||||||
|
Assert.assertTrue(matcher.match(".xn--h-2fa.no"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testWhitespace() throws Exception {
|
||||||
|
Assert.assertTrue(matcher.match(".xx"));
|
||||||
|
// yy appears after whitespace
|
||||||
|
Assert.assertFalse(matcher.match(".yy"));
|
||||||
|
// zz is commented
|
||||||
|
Assert.assertFalse(matcher.match(".zz"));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue