Factored Public Suffix matching code into a separate utility class

git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1621182 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Oleg Kalnichevski 2014-08-28 19:08:09 +00:00
parent 1e2d2f38a0
commit 3cd94a86f3
4 changed files with 202 additions and 41 deletions

View File

@ -33,7 +33,11 @@ import org.apache.http.annotation.Immutable;
import org.apache.http.util.Args; import org.apache.http.util.Args;
/** /**
* Public suffix list from <a href="http://publicsuffix.org/">publicsuffix.org</a>. * Public suffix is a set of DNS names or wildcards concatenated with dots. It represents
* the part of a domain name which is not under the control of the individual registrant
* <p>
* An up-to-date list of suffixes can be obtained from
* <a href="http://publicsuffix.org/">publicsuffix.org</a>
* *
* @since 4.4 * @since 4.4
*/ */

View File

@ -0,0 +1,101 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
package org.apache.http.conn.util;
import java.net.IDN;
import java.util.Collection;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.http.annotation.ThreadSafe;
import org.apache.http.util.Args;
/**
* Utility class that can test if DNS names match the content of the Public Suffix List.
* <p>
* An up-to-date list of suffixes can be obtained from
* <a href="http://publicsuffix.org/">publicsuffix.org</a>
*
* @see org.apache.http.conn.util.PublicSuffixList
*
* @since 4.4
*/
@ThreadSafe
public final class PublicSuffixMatcher {
private final Map<String, String> rules;
private final Map<String, String> exceptions;
public PublicSuffixMatcher(final Collection<String> rules, final Collection<String> exceptions) {
Args.notNull(rules, "Domain suffix rules");
this.rules = new ConcurrentHashMap<String, String>(rules.size());
for (String rule: rules) {
this.rules.put(rule, rule);
}
if (exceptions != null) {
this.exceptions = new ConcurrentHashMap<String, String>(exceptions.size());
for (String exception: exceptions) {
this.exceptions.put(exception, exception);
}
} else {
this.exceptions = null;
}
}
public boolean match(final String domain) {
String s = domain;
if (s == null) {
return false;
}
if (s.startsWith(".")) {
s = s.substring(1);
}
s = IDN.toUnicode(s);
// An exception rule takes priority over any other matching rule.
if (this.exceptions != null && this.exceptions.containsKey(s)) {
return false;
}
do {
if (this.rules.containsKey(s)) {
return true;
}
// patterns
if (s.startsWith("*.")) {
s = s.substring(2);
}
final int nextdot = s.indexOf('.');
if (nextdot == -1) {
break;
}
s = "*" + s.substring(nextdot);
} while (!s.isEmpty());
return false;
}
}

View File

@ -26,11 +26,9 @@
*/ */
package org.apache.http.impl.cookie; package org.apache.http.impl.cookie;
import java.net.IDN;
import java.util.Collection; import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import org.apache.http.conn.util.PublicSuffixMatcher;
import org.apache.http.cookie.Cookie; import org.apache.http.cookie.Cookie;
import org.apache.http.cookie.CookieAttributeHandler; import org.apache.http.cookie.CookieAttributeHandler;
import org.apache.http.cookie.CookieOrigin; import org.apache.http.cookie.CookieOrigin;
@ -49,8 +47,9 @@ import org.apache.http.cookie.SetCookie;
*/ */
public class PublicSuffixFilter implements CookieAttributeHandler { public class PublicSuffixFilter implements CookieAttributeHandler {
private final CookieAttributeHandler wrapped; private final CookieAttributeHandler wrapped;
private Set<String> exceptions; private Collection<String> exceptions;
private Set<String> suffixes; private Collection<String> suffixes;
private PublicSuffixMatcher matcher;
public PublicSuffixFilter(final CookieAttributeHandler wrapped) { public PublicSuffixFilter(final CookieAttributeHandler wrapped) {
this.wrapped = wrapped; this.wrapped = wrapped;
@ -63,7 +62,8 @@ public class PublicSuffixFilter implements CookieAttributeHandler {
* @param suffixes * @param suffixes
*/ */
public void setPublicSuffixes(final Collection<String> suffixes) { public void setPublicSuffixes(final Collection<String> suffixes) {
this.suffixes = new HashSet<String>(suffixes); this.suffixes = suffixes;
this.matcher = null;
} }
/** /**
@ -72,7 +72,8 @@ public class PublicSuffixFilter implements CookieAttributeHandler {
* @param exceptions * @param exceptions
*/ */
public void setExceptions(final Collection<String> exceptions) { public void setExceptions(final Collection<String> exceptions) {
this.exceptions = new HashSet<String>(exceptions); this.exceptions = exceptions;
this.matcher = null;
} }
/** /**
@ -97,39 +98,9 @@ public class PublicSuffixFilter implements CookieAttributeHandler {
} }
private boolean isForPublicSuffix(final Cookie cookie) { private boolean isForPublicSuffix(final Cookie cookie) {
String domain = cookie.getDomain(); if (matcher == null) {
if (domain.startsWith(".")) { matcher = new PublicSuffixMatcher(this.suffixes, this.exceptions);
domain = domain.substring(1);
} }
domain = IDN.toUnicode(domain); return matcher.match(cookie.getDomain());
// An exception rule takes priority over any other matching rule.
if (this.exceptions != null) {
if (this.exceptions.contains(domain)) {
return false;
}
}
if (this.suffixes == null) {
return false;
}
do {
if (this.suffixes.contains(domain)) {
return true;
}
// patterns
if (domain.startsWith("*.")) {
domain = domain.substring(2);
}
final int nextdot = domain.indexOf('.');
if (nextdot == -1) {
break;
}
domain = "*" + domain.substring(nextdot);
} while (!domain.isEmpty());
return false;
} }
} }

View File

@ -0,0 +1,85 @@
/*
* ====================================================================
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
* ====================================================================
*
* This software consists of voluntary contributions made by many
* individuals on behalf of the Apache Software Foundation. For more
* information on the Apache Software Foundation, please see
* <http://www.apache.org/>.
*
*/
package org.apache.http.conn.util;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.apache.http.Consts;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
public class TestPublicSuffixMatcher {
private static final String SOURCE_FILE = "suffixlist.txt";
private PublicSuffixMatcher matcher;
@Before
public void setUp() throws Exception {
final ClassLoader classLoader = getClass().getClassLoader();
final InputStream in = classLoader.getResourceAsStream(SOURCE_FILE);
Assert.assertNotNull(in);
final PublicSuffixList suffixList;
try {
final PublicSuffixListParser parser = new PublicSuffixListParser();
suffixList = parser.parse(new InputStreamReader(in, Consts.UTF_8));
} finally {
in.close();
}
matcher = new PublicSuffixMatcher(suffixList.getRules(), suffixList.getExceptions());
}
@Test
public void testParse() throws Exception {
Assert.assertTrue(matcher.match(".jp"));
Assert.assertTrue(matcher.match(".ac.jp"));
Assert.assertTrue(matcher.match(".any.tokyo.jp"));
// exception
Assert.assertFalse(matcher.match(".metro.tokyo.jp"));
}
@Test
public void testUnicode() throws Exception {
Assert.assertTrue(matcher.match(".h\u00E5.no")); // \u00E5 is <aring>
Assert.assertTrue(matcher.match(".xn--h-2fa.no"));
Assert.assertTrue(matcher.match(".h\u00E5.no"));
Assert.assertTrue(matcher.match(".xn--h-2fa.no"));
}
@Test
public void testWhitespace() throws Exception {
Assert.assertTrue(matcher.match(".xx"));
// yy appears after whitespace
Assert.assertFalse(matcher.match(".yy"));
// zz is commented
Assert.assertFalse(matcher.match(".zz"));
}
}