Refactored public suffix list parser; deprecated old implementation
git-svn-id: https://svn.apache.org/repos/asf/httpcomponents/httpclient/trunk@1620941 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
45100c7d6d
commit
221e7ba59e
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* ====================================================================
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
package org.apache.http.conn.util;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.http.annotation.Immutable;
|
||||||
|
import org.apache.http.util.Args;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Public suffix list from <a href="http://publicsuffix.org/">publicsuffix.org</a>.
|
||||||
|
*
|
||||||
|
* @since 4.4
|
||||||
|
*/
|
||||||
|
@Immutable
|
||||||
|
public final class PublicSuffixList {
|
||||||
|
|
||||||
|
private final List<String> rules;
|
||||||
|
private final List<String> exceptions;
|
||||||
|
|
||||||
|
public PublicSuffixList(final List<String> rules, final List<String> exceptions) {
|
||||||
|
this.rules = Collections.unmodifiableList(Args.notNull(rules, "Domain suffix rules"));
|
||||||
|
this.exceptions = Collections.unmodifiableList(Args.notNull(exceptions, "Domain suffix exceptions"));
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getRules() {
|
||||||
|
return rules;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getExceptions() {
|
||||||
|
return exceptions;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,114 @@
|
||||||
|
/*
|
||||||
|
* ====================================================================
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
package org.apache.http.conn.util;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.Reader;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.http.annotation.Immutable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
|
||||||
|
* and configures a PublicSuffixFilter.
|
||||||
|
*
|
||||||
|
* @since 4.4
|
||||||
|
*/
|
||||||
|
@Immutable
|
||||||
|
public final class PublicSuffixListParser {
|
||||||
|
|
||||||
|
private static final int MAX_LINE_LEN = 256;
|
||||||
|
|
||||||
|
public PublicSuffixListParser() {
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Parses the public suffix list format. When creating the reader from the file, make sure to
|
||||||
|
* use the correct encoding (the original list is in UTF-8).
|
||||||
|
*
|
||||||
|
* @param reader the data reader. The caller is responsible for closing the reader.
|
||||||
|
* @throws java.io.IOException on error while reading from list
|
||||||
|
*/
|
||||||
|
public PublicSuffixList parse(final Reader reader) throws IOException {
|
||||||
|
final List<String> rules = new ArrayList<String>();
|
||||||
|
final List<String> exceptions = new ArrayList<String>();
|
||||||
|
final BufferedReader r = new BufferedReader(reader);
|
||||||
|
final StringBuilder sb = new StringBuilder(256);
|
||||||
|
boolean more = true;
|
||||||
|
while (more) {
|
||||||
|
more = readLine(r, sb);
|
||||||
|
String line = sb.toString();
|
||||||
|
if (line.isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (line.startsWith("//")) {
|
||||||
|
continue; //entire lines can also be commented using //
|
||||||
|
}
|
||||||
|
if (line.startsWith(".")) {
|
||||||
|
line = line.substring(1); // A leading dot is optional
|
||||||
|
}
|
||||||
|
// An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
|
||||||
|
final boolean isException = line.startsWith("!");
|
||||||
|
if (isException) {
|
||||||
|
line = line.substring(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isException) {
|
||||||
|
exceptions.add(line);
|
||||||
|
} else {
|
||||||
|
rules.add(line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new PublicSuffixList(rules, exceptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean readLine(final Reader r, final StringBuilder sb) throws IOException {
|
||||||
|
sb.setLength(0);
|
||||||
|
int b;
|
||||||
|
boolean hitWhitespace = false;
|
||||||
|
while ((b = r.read()) != -1) {
|
||||||
|
final char c = (char) b;
|
||||||
|
if (c == '\n') {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Each line is only read up to the first whitespace
|
||||||
|
if (Character.isWhitespace(c)) {
|
||||||
|
hitWhitespace = true;
|
||||||
|
}
|
||||||
|
if (!hitWhitespace) {
|
||||||
|
sb.append(c);
|
||||||
|
}
|
||||||
|
if (sb.length() > MAX_LINE_LEN) {
|
||||||
|
throw new IOException("Line too long"); // prevent excess memory usage
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return (b != -1);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -26,27 +26,29 @@
|
||||||
*/
|
*/
|
||||||
package org.apache.http.impl.cookie;
|
package org.apache.http.impl.cookie;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collection;
|
|
||||||
|
|
||||||
import org.apache.http.annotation.Immutable;
|
import org.apache.http.annotation.Immutable;
|
||||||
|
import org.apache.http.conn.util.PublicSuffixList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
|
* Parses the list from <a href="http://publicsuffix.org/">publicsuffix.org</a>
|
||||||
* and configures a PublicSuffixFilter.
|
* and configures a PublicSuffixFilter.
|
||||||
*
|
*
|
||||||
|
* @deprecated (4.4) use {@link org.apache.http.conn.util.PublicSuffixListParser}.
|
||||||
|
*
|
||||||
* @since 4.0
|
* @since 4.0
|
||||||
*/
|
*/
|
||||||
@Immutable
|
@Immutable
|
||||||
public class PublicSuffixListParser {
|
public class PublicSuffixListParser {
|
||||||
private static final int MAX_LINE_LEN = 256;
|
|
||||||
private final PublicSuffixFilter filter;
|
private final PublicSuffixFilter filter;
|
||||||
|
private final org.apache.http.conn.util.PublicSuffixListParser parser;
|
||||||
|
|
||||||
PublicSuffixListParser(final PublicSuffixFilter filter) {
|
PublicSuffixListParser(final PublicSuffixFilter filter) {
|
||||||
this.filter = filter;
|
this.filter = filter;
|
||||||
|
this.parser = new org.apache.http.conn.util.PublicSuffixListParser();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -54,74 +56,13 @@ public class PublicSuffixListParser {
|
||||||
* When creating the reader from the file, make sure to
|
* When creating the reader from the file, make sure to
|
||||||
* use the correct encoding (the original list is in UTF-8).
|
* use the correct encoding (the original list is in UTF-8).
|
||||||
*
|
*
|
||||||
* @param list the suffix list. The caller is responsible for closing the reader.
|
* @param reader the suffix list. The caller is responsible for closing the reader.
|
||||||
* @throws IOException on error while reading from list
|
* @throws IOException on error while reading from list
|
||||||
*/
|
*/
|
||||||
public void parse(final Reader list) throws IOException {
|
public void parse(final Reader reader) throws IOException {
|
||||||
final Collection<String> rules = new ArrayList<String>();
|
final PublicSuffixList suffixList = parser.parse(reader);
|
||||||
final Collection<String> exceptions = new ArrayList<String>();
|
filter.setPublicSuffixes(suffixList.getRules());
|
||||||
final BufferedReader r = new BufferedReader(list);
|
filter.setExceptions(suffixList.getExceptions());
|
||||||
final StringBuilder sb = new StringBuilder(256);
|
|
||||||
boolean more = true;
|
|
||||||
while (more) {
|
|
||||||
more = readLine(r, sb);
|
|
||||||
String line = sb.toString();
|
|
||||||
if (line.isEmpty()) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
if (line.startsWith("//"))
|
|
||||||
{
|
|
||||||
continue; //entire lines can also be commented using //
|
|
||||||
}
|
|
||||||
if (line.startsWith("."))
|
|
||||||
{
|
|
||||||
line = line.substring(1); // A leading dot is optional
|
|
||||||
}
|
|
||||||
// An exclamation mark (!) at the start of a rule marks an exception to a previous wildcard rule
|
|
||||||
final boolean isException = line.startsWith("!");
|
|
||||||
if (isException) {
|
|
||||||
line = line.substring(1);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (isException) {
|
|
||||||
exceptions.add(line);
|
|
||||||
} else {
|
|
||||||
rules.add(line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
filter.setPublicSuffixes(rules);
|
|
||||||
filter.setExceptions(exceptions);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
*
|
|
||||||
* @param r
|
|
||||||
* @param sb
|
|
||||||
* @return false when the end of the stream is reached
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private boolean readLine(final Reader r, final StringBuilder sb) throws IOException {
|
|
||||||
sb.setLength(0);
|
|
||||||
int b;
|
|
||||||
boolean hitWhitespace = false;
|
|
||||||
while ((b = r.read()) != -1) {
|
|
||||||
final char c = (char) b;
|
|
||||||
if (c == '\n') {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Each line is only read up to the first whitespace
|
|
||||||
if (Character.isWhitespace(c)) {
|
|
||||||
hitWhitespace = true;
|
|
||||||
}
|
|
||||||
if (!hitWhitespace) {
|
|
||||||
sb.append(c);
|
|
||||||
}
|
|
||||||
if (sb.length() > MAX_LINE_LEN)
|
|
||||||
{
|
|
||||||
throw new IOException("Line too long"); // prevent excess memory usage
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return (b != -1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
/*
|
||||||
|
* ====================================================================
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one
|
||||||
|
* or more contributor license agreements. See the NOTICE file
|
||||||
|
* distributed with this work for additional information
|
||||||
|
* regarding copyright ownership. The ASF licenses this file
|
||||||
|
* to you under the Apache License, Version 2.0 (the
|
||||||
|
* "License"); you may not use this file except in compliance
|
||||||
|
* with the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing,
|
||||||
|
* software distributed under the License is distributed on an
|
||||||
|
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||||
|
* KIND, either express or implied. See the License for the
|
||||||
|
* specific language governing permissions and limitations
|
||||||
|
* under the License.
|
||||||
|
* ====================================================================
|
||||||
|
*
|
||||||
|
* This software consists of voluntary contributions made by many
|
||||||
|
* individuals on behalf of the Apache Software Foundation. For more
|
||||||
|
* information on the Apache Software Foundation, please see
|
||||||
|
* <http://www.apache.org/>.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.http.conn.util;
|
||||||
|
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import org.apache.http.Consts;
|
||||||
|
import org.junit.Assert;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
|
public class TestPublicSuffixListParser {
|
||||||
|
|
||||||
|
private static final String SOURCE_FILE = "suffixlist.txt";
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testParse() throws Exception {
|
||||||
|
final ClassLoader classLoader = getClass().getClassLoader();
|
||||||
|
final InputStream in = classLoader.getResourceAsStream(SOURCE_FILE);
|
||||||
|
Assert.assertNotNull(in);
|
||||||
|
final PublicSuffixList suffixList;
|
||||||
|
try {
|
||||||
|
final PublicSuffixListParser parser = new PublicSuffixListParser();
|
||||||
|
suffixList = parser.parse(new InputStreamReader(in, Consts.UTF_8));
|
||||||
|
} finally {
|
||||||
|
in.close();
|
||||||
|
}
|
||||||
|
Assert.assertNotNull(suffixList);
|
||||||
|
Assert.assertEquals(Arrays.asList("jp", "ac.jp", "*.tokyo.jp", "no", "h\u00E5.no", "xx"), suffixList.getRules());
|
||||||
|
Assert.assertEquals(Arrays.asList("metro.tokyo.jp"), suffixList.getExceptions());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue