[QTL] Allow S3 version finder to search entire s3 object key (#3139)

* Allow S3 version finder to search entire s3 object key
* Previously only was able to search immediate "directory"

* Update method javadoc

* Expand docs a bit better
This commit is contained in:
Charles Allen 2016-06-13 21:02:28 -07:00 committed by Fangjin Yang
parent e7ac2e8ba2
commit f7fa1d8c62
2 changed files with 13 additions and 11 deletions

View File

@ -44,13 +44,15 @@ public class S3TimestampVersionedDataFinder extends S3DataSegmentPuller implemen
}
/**
* Gets the latest version using the "parent" of uri as a prefix. The "parent" of uri is evaluated assuming '/'
* delimited paths. If the uri path ends with '/', the path is assumed to be the parent.
* Gets the key with the most recently modified timestamp.
* `pattern` is evaluated against the entire key AFTER the path given in `uri`.
* The substring `pattern` is matched against will have a leading `/` removed.
* For example `s3://some_bucket/some_prefix/some_key` with a URI of `s3://some_bucket/some_prefix` will match against `some_key`.
* `s3://some_bucket/some_prefixsome_key` with a URI of `s3://some_bucket/some_prefix` will match against `some_key`
* `s3://some_bucket/some_prefix//some_key` with a URI of `s3://some_bucket/some_prefix` will match against `/some_key`
*
* @param uri The URI of interest whose "parent" will be searched as a key prefix for the latest version
* @param pattern The pattern matcher to determine if a *key* is of interest. This will match against the portion of the key that is beyond the URI path,
* not just the equivalent "filename" like some other implementations. A null value matches everything.
* If there is a "/" delimiter between the uri path and the file match, it is ignore. Patterns should **not** account for a leading "/" unless there's a double "/" for some reason
* @param uri The URI of in the form of `s3://some_bucket/some_key`
* @param pattern The pattern matcher to determine if a *key* is of interest, or `null` to match everything.
*
* @return A URI to the most recently modified object which matched the pattern.
*/
@ -67,7 +69,7 @@ public class S3TimestampVersionedDataFinder extends S3DataSegmentPuller implemen
final S3Coords coords = new S3Coords(checkURI(uri));
long mostRecent = Long.MIN_VALUE;
URI latest = null;
S3Object[] objects = s3Client.listObjects(coords.bucket, coords.path, "/");
S3Object[] objects = s3Client.listObjects(coords.bucket, coords.path, null);
if (objects == null) {
return null;
}

View File

@ -50,7 +50,7 @@ public class S3TimestampVersionedDataFinderTest
object1.setKey(keyPrefix + "/renames-1.gz");
object1.setLastModifiedDate(new Date(1));
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.eq("/"))).andReturn(
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.<String>isNull())).andReturn(
new S3Object[]{object0, object1}
).once();
S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client);
@ -86,7 +86,7 @@ public class S3TimestampVersionedDataFinderTest
object1.setKey(keyPrefix + "/renames-1.gz");
object1.setLastModifiedDate(new Date(1));
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.eq("/"))).andReturn(
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.<String>isNull())).andReturn(
null
).once();
S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client);
@ -116,7 +116,7 @@ public class S3TimestampVersionedDataFinderTest
object0.setKey(keyPrefix + "/renames-0.gz");
object0.setLastModifiedDate(new Date(0));
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.eq("/"))).andReturn(
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.<String>isNull())).andReturn(
new S3Object[]{object0}
).once();
S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client);
@ -148,7 +148,7 @@ public class S3TimestampVersionedDataFinderTest
object0.setKey(keyPrefix + "/renames-0.gz");
object0.setLastModifiedDate(new Date(0));
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.eq("/"))).andReturn(
EasyMock.expect(s3Client.listObjects(EasyMock.eq(bucket), EasyMock.anyString(), EasyMock.<String>isNull())).andReturn(
new S3Object[]{object0}
).once();
S3TimestampVersionedDataFinder finder = new S3TimestampVersionedDataFinder(s3Client);