Fix GCS based ingestion if bucket name contains underscores (#12445)

GCP allows bucket names to contain underscores. When a location in such a bucket
is mapped to `java.net.URI`, `URI.getHost()` returns null. `URI.getHost()` is used as
the bucket name in `CloudObjectLocation`, leading to an NPE.

This commit uses `URI.getAuthority()` as the bucket name if `URI.getHost()` is null.
This commit is contained in:
Tejaswini Bandlamudi 2022-04-21 09:22:35 +05:30 committed by GitHub
parent 5099f5aa70
commit 177e1856cd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 22 additions and 8 deletions

View File

@ -71,7 +71,7 @@ public class CloudObjectLocation
public CloudObjectLocation(URI uri)
{
this(uri.getHost(), uri.getPath());
this(uri.getHost() != null ? uri.getHost() : uri.getAuthority(), uri.getPath());
}
/**

View File

@ -115,13 +115,27 @@ public class CloudObjectLocationTest
}
@Test
public void testInvalidBucketName()
public void testBucketNameWithoutUnderscores()
{
expectedException.expect(NullPointerException.class);
expectedException.expectMessage("bucket name cannot be null. Please verify if bucket name adheres to naming rules");
// Underscore(_) character is not valid for bucket names
CloudObjectLocation invalidBucket1 = new CloudObjectLocation("test_bucket", "path/to/path");
CloudObjectLocation invalidBucket2 = new CloudObjectLocation(invalidBucket1.toUri(SCHEME));
Assert.assertEquals("test_bucket", new CloudObjectLocation(invalidBucket2.toUri(SCHEME)));
CloudObjectLocation gsValidBucket = new CloudObjectLocation(URI.create("gs://1test.bucket-value/path/to/path"));
Assert.assertEquals("1test.bucket-value", gsValidBucket.getBucket());
Assert.assertEquals("path/to/path", gsValidBucket.getPath());
CloudObjectLocation s3ValidBucket = new CloudObjectLocation(URI.create("s3://2test.bucket-value/path/to/path"));
Assert.assertEquals("2test.bucket-value", s3ValidBucket.getBucket());
Assert.assertEquals("path/to/path", s3ValidBucket.getPath());
}
@Test
public void testBucketNameWithUnderscores()
{
// Underscore(_) character is allowed for bucket names by GCP
CloudObjectLocation gsValidBucket = new CloudObjectLocation(URI.create("gs://test_bucket/path/to/path"));
Assert.assertEquals("test_bucket", gsValidBucket.getBucket());
Assert.assertEquals("path/to/path", gsValidBucket.getPath());
CloudObjectLocation s3ValidBucket = new CloudObjectLocation(URI.create("s3://test_bucket/path/to/path"));
Assert.assertEquals("test_bucket", s3ValidBucket.getBucket());
Assert.assertEquals("path/to/path", s3ValidBucket.getPath());
}
}