HBASE-25168 Unify WAL name timestamp parsers

Signed-off-by: Duo Zhang <zhangduo@apache.org>
Signed-off-by: Peter Somogyi <psomogyi@apache.org>
This commit is contained in:
stack 2020-10-09 21:00:30 -07:00
parent 8eea052359
commit c28220522d
3 changed files with 39 additions and 46 deletions

View File

@ -336,7 +336,7 @@ public class WALInputFormat extends InputFormat<WALKey, WALEdit> {
static void addFile(List<FileStatus> result, LocatedFileStatus lfs, long startTime, static void addFile(List<FileStatus> result, LocatedFileStatus lfs, long startTime,
long endTime) { long endTime) {
long timestamp = WAL.getTimestamp(lfs.getPath().getName()); long timestamp = AbstractFSWALProvider.getTimestamp(lfs.getPath().getName());
if (timestamp > 0) { if (timestamp > 0) {
// Looks like a valid timestamp. // Looks like a valid timestamp.
if (timestamp <= endTime && timestamp >= startTime) { if (timestamp <= endTime && timestamp >= startTime) {

View File

@ -258,32 +258,37 @@ public abstract class AbstractFSWALProvider<T extends AbstractFSWAL<?>> implemen
public static final String SPLITTING_EXT = "-splitting"; public static final String SPLITTING_EXT = "-splitting";
/** /**
* It returns the file create timestamp from the file name. For name format see * Pattern used to validate a WAL file name see {@link #validateWALFilename(String)} for
* description.
*/
private static final Pattern WAL_FILE_NAME_PATTERN =
Pattern.compile("(.+)\\.(\\d+)(\\.[0-9A-Za-z]+)?");
/**
* Define for when no timestamp found.
*/
private static final long NO_TIMESTAMP = -1L;
/**
* It returns the file create timestamp (the 'FileNum') from the file name. For name format see
* {@link #validateWALFilename(String)} public until remaining tests move to o.a.h.h.wal * {@link #validateWALFilename(String)} public until remaining tests move to o.a.h.h.wal
* @param wal must not be null * @param wal must not be null
* @return the file number that is part of the WAL file name * @return the file number that is part of the WAL file name
*/ */
@VisibleForTesting @VisibleForTesting
public static long extractFileNumFromWAL(final WAL wal) { public static long extractFileNumFromWAL(final WAL wal) {
final Path walName = ((AbstractFSWAL<?>) wal).getCurrentFileName(); final Path walPath = ((AbstractFSWAL<?>) wal).getCurrentFileName();
if (walName == null) { if (walPath == null) {
throw new IllegalArgumentException("The WAL path couldn't be null"); throw new IllegalArgumentException("The WAL path couldn't be null");
} }
Matcher matcher = WAL_FILE_NAME_PATTERN.matcher(walName.getName()); String name = walPath.getName();
if (matcher.matches()) { long timestamp = getTimestamp(name);
return Long.parseLong(matcher.group(2)); if (timestamp == NO_TIMESTAMP) {
} else { throw new IllegalArgumentException(name + " is not a valid wal file name");
throw new IllegalArgumentException(walName.getName() + " is not a valid wal file name");
} }
return timestamp;
} }
/**
* Pattern used to validate a WAL file name see {@link #validateWALFilename(String)} for
* description.
*/
private static final Pattern WAL_FILE_NAME_PATTERN =
Pattern.compile("(.+)\\.(\\d+)(\\.[0-9A-Za-z]+)?");
/** /**
* A WAL file name is of the format: &lt;wal-name&gt;{@link #WAL_FILE_NAME_DELIMITER} * A WAL file name is of the format: &lt;wal-name&gt;{@link #WAL_FILE_NAME_DELIMITER}
* &lt;file-creation-timestamp&gt;[.&lt;suffix&gt;]. provider-name is usually made up of a * &lt;file-creation-timestamp&gt;[.&lt;suffix&gt;]. provider-name is usually made up of a
@ -295,6 +300,23 @@ public abstract class AbstractFSWALProvider<T extends AbstractFSWAL<?>> implemen
return WAL_FILE_NAME_PATTERN.matcher(filename).matches(); return WAL_FILE_NAME_PATTERN.matcher(filename).matches();
} }
/**
* Split a WAL filename to get a start time. WALs usually have the time we start writing to them
* with as part of their name, usually the suffix. Sometimes there will be an extra suffix as when
* it is a WAL for the meta table. For example, WALs might look like this
* <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the
* timestamp. Could also be a meta WAL which adds a '.meta' suffix or a
* synchronous replication WAL which adds a '.syncrep' suffix. Check for these. File also may have
* no timestamp on it. For example the recovered.edits files are WALs but are named in ascending
* order. Here is an example: 0000000000000016310. Allow for this.
* @param name Name of the WAL file.
* @return Timestamp or {@link #NO_TIMESTAMP}.
*/
public static long getTimestamp(String name) {
Matcher matcher = WAL_FILE_NAME_PATTERN.matcher(name);
return matcher.matches() ? Long.parseLong(matcher.group(2)): NO_TIMESTAMP;
}
/** /**
* Construct the directory name for all WALs on a given server. Dir names currently look like this * Construct the directory name for all WALs on a given server. Dir names currently look like this
* for WALs: <code>hbase//WALs/kalashnikov.att.net,61634,1486865297088</code>. * for WALs: <code>hbase//WALs/kalashnikov.att.net,61634,1486865297088</code>.
@ -444,7 +466,7 @@ public abstract class AbstractFSWALProvider<T extends AbstractFSWAL<?>> implemen
* @return start time * @return start time
*/ */
private static long getTS(Path p) { private static long getTS(Path p) {
return WAL.getTimestamp(p.getName()); return getTimestamp(p.getName());
} }
} }

View File

@ -32,7 +32,6 @@ import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability; import org.apache.yetus.audience.InterfaceStability;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting; import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
import static org.apache.commons.lang3.StringUtils.isNumeric;
/** /**
* A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides * A Write Ahead Log (WAL) provides service for reading, writing waledits. This interface provides
@ -300,32 +299,4 @@ public interface WAL extends Closeable, WALFileLengthProvider {
return this.key + "=" + this.edit; return this.key + "=" + this.edit;
} }
} }
/**
* Split a WAL filename to get a start time. WALs usually have the time we start writing to them
* as part of their name, usually the suffix. Sometimes there will be an extra suffix as when it
* is a WAL for the meta table. For example, WALs might look like this
* <code>10.20.20.171%3A60020.1277499063250</code> where <code>1277499063250</code> is the
* timestamp. Could also be a meta WAL which adds a '.meta' suffix or a
* synchronous replication WAL which adds a '.syncrep' suffix. Check for these. File also may have
* no timestamp on it. For example the recovered.edits files are WALs but are named in ascending
* order. Here is an example: 0000000000000016310. Allow for this.
* @param name Name of the WAL file.
* @return Timestamp or -1.
*/
public static long getTimestamp(String name) {
String [] splits = name.split("\\.");
if (splits.length <= 1) {
return -1;
}
String timestamp = splits[splits.length - 1];
if (!isNumeric(timestamp)) {
// Its a '.meta' or a '.syncrep' suffix.
timestamp = splits[splits.length - 2];
if (!isNumeric(timestamp)) {
return -1;
}
}
return Long.parseLong(timestamp);
}
} }