From c28220522d71edfde3424d0413cd19a5bdff6e60 Mon Sep 17 00:00:00 2001 From: stack Date: Fri, 9 Oct 2020 21:00:30 -0700 Subject: [PATCH] HBASE-25168 Unify WAL name timestamp parsers Signed-off-by: Duo Zhang Signed-off-by: Peter Somogyi --- .../hbase/mapreduce/WALInputFormat.java | 2 +- .../hbase/wal/AbstractFSWALProvider.java | 54 +++++++++++++------ .../java/org/apache/hadoop/hbase/wal/WAL.java | 29 ---------- 3 files changed, 39 insertions(+), 46 deletions(-) diff --git a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java index b410fc22d89..14bfec72efe 100644 --- a/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java +++ b/hbase-mapreduce/src/main/java/org/apache/hadoop/hbase/mapreduce/WALInputFormat.java @@ -336,7 +336,7 @@ public class WALInputFormat extends InputFormat { static void addFile(List result, LocatedFileStatus lfs, long startTime, long endTime) { - long timestamp = WAL.getTimestamp(lfs.getPath().getName()); + long timestamp = AbstractFSWALProvider.getTimestamp(lfs.getPath().getName()); if (timestamp > 0) { // Looks like a valid timestamp. if (timestamp <= endTime && timestamp >= startTime) { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java index 84c94e60816..109e1107669 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/AbstractFSWALProvider.java @@ -258,32 +258,37 @@ public abstract class AbstractFSWALProvider> implemen public static final String SPLITTING_EXT = "-splitting"; /** - * It returns the file create timestamp from the file name. For name format see + * Pattern used to validate a WAL file name see {@link #validateWALFilename(String)} for + * description. + */ + private static final Pattern WAL_FILE_NAME_PATTERN = + Pattern.compile("(.+)\\.(\\d+)(\\.[0-9A-Za-z]+)?"); + + /** + * Define for when no timestamp found. + */ + private static final long NO_TIMESTAMP = -1L; + + /** + * It returns the file create timestamp (the 'FileNum') from the file name. For name format see * {@link #validateWALFilename(String)} public until remaining tests move to o.a.h.h.wal * @param wal must not be null * @return the file number that is part of the WAL file name */ @VisibleForTesting public static long extractFileNumFromWAL(final WAL wal) { - final Path walName = ((AbstractFSWAL) wal).getCurrentFileName(); - if (walName == null) { + final Path walPath = ((AbstractFSWAL) wal).getCurrentFileName(); + if (walPath == null) { throw new IllegalArgumentException("The WAL path couldn't be null"); } - Matcher matcher = WAL_FILE_NAME_PATTERN.matcher(walName.getName()); - if (matcher.matches()) { - return Long.parseLong(matcher.group(2)); - } else { - throw new IllegalArgumentException(walName.getName() + " is not a valid wal file name"); + String name = walPath.getName(); + long timestamp = getTimestamp(name); + if (timestamp == NO_TIMESTAMP) { + throw new IllegalArgumentException(name + " is not a valid wal file name"); } + return timestamp; } - /** - * Pattern used to validate a WAL file name see {@link #validateWALFilename(String)} for - * description. - */ - private static final Pattern WAL_FILE_NAME_PATTERN = - Pattern.compile("(.+)\\.(\\d+)(\\.[0-9A-Za-z]+)?"); - /** * A WAL file name is of the format: <wal-name>{@link #WAL_FILE_NAME_DELIMITER} * <file-creation-timestamp>[.<suffix>]. provider-name is usually made up of a @@ -295,6 +300,23 @@ public abstract class AbstractFSWALProvider> implemen return WAL_FILE_NAME_PATTERN.matcher(filename).matches(); } + /** + * Split a WAL filename to get a start time. WALs usually have the time we start writing to them + * with as part of their name, usually the suffix. Sometimes there will be an extra suffix as when + * it is a WAL for the meta table. For example, WALs might look like this + * 10.20.20.171%3A60020.1277499063250 where 1277499063250 is the + * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a + * synchronous replication WAL which adds a '.syncrep' suffix. Check for these. File also may have + * no timestamp on it. For example the recovered.edits files are WALs but are named in ascending + * order. Here is an example: 0000000000000016310. Allow for this. + * @param name Name of the WAL file. + * @return Timestamp or {@link #NO_TIMESTAMP}. + */ + public static long getTimestamp(String name) { + Matcher matcher = WAL_FILE_NAME_PATTERN.matcher(name); + return matcher.matches() ? Long.parseLong(matcher.group(2)): NO_TIMESTAMP; + } + /** * Construct the directory name for all WALs on a given server. Dir names currently look like this * for WALs: hbase//WALs/kalashnikov.att.net,61634,1486865297088. @@ -444,7 +466,7 @@ public abstract class AbstractFSWALProvider> implemen * @return start time */ private static long getTS(Path p) { - return WAL.getTimestamp(p.getName()); + return getTimestamp(p.getName()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java index 20379fd7fe9..747b2770d45 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/wal/WAL.java @@ -32,7 +32,6 @@ import org.apache.yetus.audience.InterfaceAudience; import org.apache.yetus.audience.InterfaceStability; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; -import static org.apache.commons.lang3.StringUtils.isNumeric; /** * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides @@ -300,32 +299,4 @@ public interface WAL extends Closeable, WALFileLengthProvider { return this.key + "=" + this.edit; } } - - /** - * Split a WAL filename to get a start time. WALs usually have the time we start writing to them - * as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it - * is a WAL for the meta table. For example, WALs might look like this - * 10.20.20.171%3A60020.1277499063250 where 1277499063250 is the - * timestamp. Could also be a meta WAL which adds a '.meta' suffix or a - * synchronous replication WAL which adds a '.syncrep' suffix. Check for these. File also may have - * no timestamp on it. For example the recovered.edits files are WALs but are named in ascending - * order. Here is an example: 0000000000000016310. Allow for this. - * @param name Name of the WAL file. - * @return Timestamp or -1. - */ - public static long getTimestamp(String name) { - String [] splits = name.split("\\."); - if (splits.length <= 1) { - return -1; - } - String timestamp = splits[splits.length - 1]; - if (!isNumeric(timestamp)) { - // Its a '.meta' or a '.syncrep' suffix. - timestamp = splits[splits.length - 2]; - if (!isNumeric(timestamp)) { - return -1; - } - } - return Long.parseLong(timestamp); - } }