From 9e33dcb8b2907fc7dd4fef447985b34e562b7d45 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:39:33 -0400 Subject: [PATCH 01/20] feat(S3AccessIT): scrub minio from S3AccessIT test --- .../harvard/iq/dataverse/api/S3AccessIT.java | 43 +++---------------- 1 file changed, 7 insertions(+), 36 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 48a64490796..1d15f87b131 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -46,7 +46,7 @@ import org.junit.jupiter.api.Test; /** - * This test requires LocalStack and Minio to be running. Developers can use our + * This test requires LocalStack to be running. Developers can use our * docker-compose file, which has all the necessary configuration. */ public class S3AccessIT { @@ -55,7 +55,6 @@ public class S3AccessIT { static final String BUCKET_NAME = "mybucket"; static S3Client s3localstack = null; - static S3Client s3minio = null; @BeforeAll public static void setUp() { @@ -71,45 +70,21 @@ public static void setUp() { .region(Region.US_EAST_2) .build(); - String accessKeyMinio = "4cc355_k3y"; - String secretKeyMinio = "s3cr3t_4cc355_k3y"; - s3minio = S3Client.builder() - .credentialsProvider(StaticCredentialsProvider.create(AwsBasicCredentials.create(accessKeyMinio, secretKeyMinio))) - .endpointOverride(URI.create("http://localhost:9000")) - .region(Region.US_EAST_1) - .forcePathStyle(true) - .build(); - // create bucket if it doesn't exist try { s3localstack.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); } catch (NoSuchBucketException ex) { s3localstack.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); } - - try { - s3minio.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); - } catch (NoSuchBucketException ex) { - try { - CreateBucketResponse createBucketResponse = s3minio.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); - if (createBucketResponse.sdkHttpResponse().isSuccessful()) { - System.out.println("Bucket created successfully"); - } else { - System.err.println("Failed to create bucket: " + createBucketResponse.sdkHttpResponse().statusCode()); - } - } catch (S3Exception e) { - System.err.println("Error creating bucket: " + e.getMessage()); - } - } } /** - * We're using MinIO for testing non-direct upload. + * We're using Localstack for testing non-direct upload. */ @Test public void testNonDirectUpload() { - String driverId = "minio1"; - String driverLabel = "MinIO"; + String driverId = "localstack1"; + String driverLabel = "LocalStack"; Response createSuperuser = UtilIT.createRandomUser(); createSuperuser.then().assertThat().statusCode(200); @@ -124,7 +99,6 @@ public void testNonDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -191,7 +165,7 @@ public void testNonDirectUpload() { String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; String s3Object = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -220,7 +194,7 @@ public void testNonDirectUpload() { S3Exception expectedException = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -258,7 +232,6 @@ public void testDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -441,7 +414,7 @@ public void testDirectUpload() { S3Exception expectedException = null; try { - ResponseInputStream s3ObjectResponse = s3minio.getObject(GetObjectRequest.builder() + ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() .bucket(BUCKET_NAME) .key(keyInS3) .build()); @@ -476,7 +449,6 @@ public void testDirectUploadDetectStataFile() { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } @@ -663,7 +635,6 @@ public void testDirectUploadWithFileCountLimit() throws JsonParseException { "status": "OK", "data": { "LocalStack": "localstack1", - "MinIO": "minio1", "Local": "local", "Filesystem": "file1" } From 87ed705cfd743b263a19877878f2f9aaeada9459 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:42:41 -0400 Subject: [PATCH 02/20] feat(docker-compose-dev): scrub minio from docker-compose-dev.yml --- docker-compose-dev.yml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index b24bf0ed6f6..809c0954e89 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -44,16 +44,6 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default - -Ddataverse.files.minio1.type=s3 - -Ddataverse.files.minio1.label=MinIO - -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 - -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 - -Ddataverse.files.minio1.bucket-name=mybucket - -Ddataverse.files.minio1.path-style-access=true - -Ddataverse.files.minio1.upload-redirect=false - -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE @@ -252,23 +242,6 @@ services: tmpfs: - /localstack:mode=770,size=128M,uid=1000,gid=1000 - dev_minio: - container_name: "dev_minio" - hostname: "minio" - image: minio/minio - restart: on-failure - ports: - - "9000:9000" - - "9001:9001" - networks: - - dataverse - volumes: - - ./docker-dev-volumes/minio_storage:/data - environment: - MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y - command: server /data - previewers-provider: container_name: previewers-provider hostname: previewers-provider From 2d2af5f033b8e0dd8793ee08c269512c3065f3de Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 11:43:20 -0400 Subject: [PATCH 03/20] feat(conf/keycloak/docker-compose-dev): scrub minio from conf/keycloak/docker-compose-dev.yml --- conf/keycloak/docker-compose-dev.yml | 27 --------------------------- 1 file changed, 27 deletions(-) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index 7356161ec47..b12aa6adbb6 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -53,16 +53,6 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default - -Ddataverse.files.minio1.type=s3 - -Ddataverse.files.minio1.label=MinIO - -Ddataverse.files.minio1.custom-endpoint-url=http://minio:9000 - -Ddataverse.files.minio1.custom-endpoint-region=us-east-1 - -Ddataverse.files.minio1.bucket-name=mybucket - -Ddataverse.files.minio1.path-style-access=true - -Ddataverse.files.minio1.upload-redirect=false - -Ddataverse.files.minio1.download-redirect=false - -Ddataverse.files.minio1.access-key=4cc355_k3y - -Ddataverse.files.minio1.secret-key=s3cr3t_4cc355_k3y -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE @@ -260,23 +250,6 @@ services: tmpfs: - /localstack:mode=770,size=128M,uid=1000,gid=1000 - dev_minio: - container_name: "dev_minio" - hostname: "minio" - image: minio/minio - restart: on-failure - ports: - - "9000:9000" - - "9001:9001" - networks: - - dataverse - volumes: - - ./docker-dev-volumes/minio_storage:/data - environment: - MINIO_ROOT_USER: 4cc355_k3y - MINIO_ROOT_PASSWORD: s3cr3t_4cc355_k3y - command: server /data - previewers-provider: container_name: previewers-provider hostname: previewers-provider From fdc1a0a8962026600bdb6f303ff11fc64bee1e7f Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:04:54 -0400 Subject: [PATCH 04/20] feat(docker-compose-dev): add localstack_noredirect storage driver config --- docker-compose-dev.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 809c0954e89..a7dc55e9ec4 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -44,6 +44,16 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.localstack_noredirect.type=s3 + -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect + -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.path-style-access=true + -Ddataverse.files.localstack_noredirect.upload-redirect=false + -Ddataverse.files.localstack_noredirect.download-redirect=false + -Ddataverse.files.localstack_noredirect.access-key=default + -Ddataverse.files.localstack_noredirect.secret-key=default -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE From 0f5f2d35a2f1fe10d6a4b37b3feeea677b6a942c Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:05:13 -0400 Subject: [PATCH 05/20] feat(keycloak/docker-compose-dev): add localstack_noredirect storage driver config --- conf/keycloak/docker-compose-dev.yml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index b12aa6adbb6..81e3878c7b1 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -53,6 +53,16 @@ services: -Ddataverse.files.localstack1.download-redirect=true -Ddataverse.files.localstack1.access-key=default -Ddataverse.files.localstack1.secret-key=default + -Ddataverse.files.localstack_noredirect.type=s3 + -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect + -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 + -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.path-style-access=true + -Ddataverse.files.localstack_noredirect.upload-redirect=false + -Ddataverse.files.localstack_noredirect.download-redirect=false + -Ddataverse.files.localstack_noredirect.access-key=default + -Ddataverse.files.localstack_noredirect.secret-key=default -Ddataverse.pid.providers=fake -Ddataverse.pid.default-provider=fake -Ddataverse.pid.fake.type=FAKE From 6a3cfd44a976a8a99b2d9c2a41152edad8451fe9 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:07:13 -0400 Subject: [PATCH 06/20] test(S3AccessIT): use localstack_noredirect driver in testNonDirectUpload Switch testNonDirectUpload from localstack1 (upload-redirect=true, download-redirect=true) to the new localstack_noredirect driver (both redirects disabled), so the test genuinely exercises the non-redirect proxy-through-Dataverse code path. Also replace the plain downloadFile call with downloadFileNoRedirect and assert statusCode(200). This makes the assertion self-documenting: a 303 response would now cause an explicit test failure instead of being silently followed by RestAssured. --- .../edu/harvard/iq/dataverse/api/S3AccessIT.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 1d15f87b131..f0834ceebbf 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -79,12 +79,16 @@ public static void setUp() { } /** - * We're using Localstack for testing non-direct upload. + * We're using LocalStack (with redirects disabled) for testing non-direct + * upload. Using localstack_noredirect ensures the non-redirect + * (proxy-through-Dataverse) code path is actually exercised. If localstack1 + * (redirect-enabled) were used, RestAssured would silently follow the 303 + * redirect and the proxy path would never be tested. */ @Test public void testNonDirectUpload() { - String driverId = "localstack1"; - String driverLabel = "LocalStack"; + String driverId = "localstack_noredirect"; + String driverLabel = "LocalStackNoRedirect"; Response createSuperuser = UtilIT.createRandomUser(); createSuperuser.then().assertThat().statusCode(200); @@ -99,6 +103,7 @@ public void testNonDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -181,8 +186,11 @@ public void testNonDirectUpload() { fail("Failed to read S3 object content: " + ex.getMessage()); } + // Use downloadFileNoRedirect to verify Dataverse serves the content directly + // (status 200). If the driver were misconfigured with download-redirect=true, + // this would return 303 instead, causing the test to fail explicitly. System.out.println("non-direct download..."); - Response downloadFile = UtilIT.downloadFile(Integer.valueOf(fileId), apiToken); + Response downloadFile = UtilIT.downloadFileNoRedirect(Integer.valueOf(fileId), apiToken); downloadFile.then().assertThat().statusCode(200); String contentsOfDownloadedFile = downloadFile.getBody().asString(); From f991ca90ff7528bf0a2070181b1fd3fd8d6f4d10 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 12:16:29 -0400 Subject: [PATCH 07/20] test(S3AccessIT): update drivers doc strings to include localstack_noredirect --- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index f0834ceebbf..5beebc2f177 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -240,6 +240,7 @@ public void testDirectUpload() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -457,6 +458,7 @@ public void testDirectUploadDetectStataFile() { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } @@ -643,6 +645,7 @@ public void testDirectUploadWithFileCountLimit() throws JsonParseException { "status": "OK", "data": { "LocalStack": "localstack1", + "LocalStackNoRedirect": "localstack_noredirect", "Local": "local", "Filesystem": "file1" } From 1e39eee73bc1ecef8af08e2a5cf8968582c9e79c Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Wed, 13 May 2026 14:10:48 -0400 Subject: [PATCH 08/20] fix(S3AccessIT): use distinct bucket name mybucket-noredirect for localstack_noredirect Using the same bucket name as localstack1 would cause a collision in the test environment when tasks/localstack_create_bucket.yml runs aws s3 mb on each bucket entry. Use mybucket-noredirect to avoid this. Update driver configs in both docker-compose files and switch S3AccessIT.testNonDirectUpload to use the new BUCKET_NAME_NOREDIRECT constant. --- conf/keycloak/docker-compose-dev.yml | 2 +- docker-compose-dev.yml | 2 +- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 7 ++++--- 3 files changed, 6 insertions(+), 5 deletions(-) diff --git a/conf/keycloak/docker-compose-dev.yml b/conf/keycloak/docker-compose-dev.yml index 81e3878c7b1..7e57cd7d83c 100644 --- a/conf/keycloak/docker-compose-dev.yml +++ b/conf/keycloak/docker-compose-dev.yml @@ -57,7 +57,7 @@ services: -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 - -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket-noredirect -Ddataverse.files.localstack_noredirect.path-style-access=true -Ddataverse.files.localstack_noredirect.upload-redirect=false -Ddataverse.files.localstack_noredirect.download-redirect=false diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index a7dc55e9ec4..c176597c990 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -48,7 +48,7 @@ services: -Ddataverse.files.localstack_noredirect.label=LocalStackNoRedirect -Ddataverse.files.localstack_noredirect.custom-endpoint-url=http://localstack:4566 -Ddataverse.files.localstack_noredirect.custom-endpoint-region=us-east-2 - -Ddataverse.files.localstack_noredirect.bucket-name=mybucket + -Ddataverse.files.localstack_noredirect.bucket-name=mybucket-noredirect -Ddataverse.files.localstack_noredirect.path-style-access=true -Ddataverse.files.localstack_noredirect.upload-redirect=false -Ddataverse.files.localstack_noredirect.download-redirect=false diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 5beebc2f177..ed800ede727 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -54,6 +54,7 @@ public class S3AccessIT { private static final Logger logger = Logger.getLogger(S3AccessIT.class.getCanonicalName()); static final String BUCKET_NAME = "mybucket"; + static final String BUCKET_NAME_NOREDIRECT = "mybucket-noredirect"; static S3Client s3localstack = null; @BeforeAll @@ -165,13 +166,13 @@ public void testNonDirectUpload() { String storageIdentifier = JsonPath.from(addFileResponse.body().asString()).getString("data.files[0].dataFile.storageIdentifier"); String keyInDataverse = storageIdentifier.split(":")[2]; - Assertions.assertEquals(driverId + "://" + BUCKET_NAME + ":" + keyInDataverse, storageIdentifier); + Assertions.assertEquals(driverId + "://" + BUCKET_NAME_NOREDIRECT + ":" + keyInDataverse, storageIdentifier); String keyInS3 = datasetStorageIdentifier + "/" + keyInDataverse; String s3Object = null; try { ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() - .bucket(BUCKET_NAME) + .bucket(BUCKET_NAME_NOREDIRECT) .key(keyInS3) .build()); // Read the content of the object into a string @@ -203,7 +204,7 @@ public void testNonDirectUpload() { S3Exception expectedException = null; try { ResponseInputStream s3ObjectResponse = s3localstack.getObject(GetObjectRequest.builder() - .bucket(BUCKET_NAME) + .bucket(BUCKET_NAME_NOREDIRECT) .key(keyInS3) .build()); // Read the content of the object into a string From 8eccaf47c6702f5c0de8eb5a706d65aeadf8abfd Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:30:15 -0400 Subject: [PATCH 09/20] docs(big-data-support): remove MinIO references --- .../source/installation/big-data-support.rst | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/doc/sphinx-guides/source/installation/big-data-support.rst b/doc/sphinx-guides/source/installation/big-data-support.rst index 45b94f71a9f..411bd6b54d8 100644 --- a/doc/sphinx-guides/source/installation/big-data-support.rst +++ b/doc/sphinx-guides/source/installation/big-data-support.rst @@ -68,7 +68,6 @@ If the bucket allows the wildcard ``*`` but the Dataverse application only allow Detailed information for the most common S3 admin tools around CORS: - `AWS `_ -- `Minio mc `_ - `s3cmd `_ Get Current CORS Policy on Bucket @@ -80,9 +79,6 @@ If you'd like to check the CORS configuration on your bucket before making chang .. group-tab:: AWS CLI :code:`aws s3api get-bucket-cors --bucket ` - .. group-tab:: Minio Client (mc) - :code:`mc cors get /` - Set CORS Policy on Bucket +++++++++++++++++++++++++ @@ -107,9 +103,6 @@ Both JSON and XML format are explained in detail in `AWS Docs ` as follows: - .. literalinclude:: /_static/installation/cors/cors.xml :name: xml-cors :language: xml @@ -124,7 +117,7 @@ Both JSON and XML format are explained in detail in `AWS Docs Date: Fri, 15 May 2026 12:32:43 -0400 Subject: [PATCH 10/20] docs(config): remove MinIO references --- doc/sphinx-guides/source/installation/config.rst | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/doc/sphinx-guides/source/installation/config.rst b/doc/sphinx-guides/source/installation/config.rst index f2a6fdfa324..2517e635006 100644 --- a/doc/sphinx-guides/source/installation/config.rst +++ b/doc/sphinx-guides/source/installation/config.rst @@ -1218,7 +1218,7 @@ You can configure this redirect properly in your cloud environment to generate a Amazon S3 Storage (or Compatible) +++++++++++++++++++++++++++++++++ -The Dataverse Software supports Amazon S3 storage as well as other S3-compatible stores (like Minio, Ceph RADOS S3 Gateway and many more) for files uploaded to your Dataverse installation. +The Dataverse Software supports Amazon S3 storage as well as other S3-compatible stores (like Ceph RADOS S3 Gateway and many more) for files uploaded to your Dataverse installation. The Dataverse Software S3 driver supports multi-part upload for large files (over 1 GB by default - see the min-part-size option in the table below to change this). @@ -1264,7 +1264,7 @@ Please make note of the following details: - **Endpoint URL** - consult the documentation of your service on how to find it. - * Example: https://play.minio.io:9000 + * Example: http://localhost.localstack.cloud:4566 - **Region:** Optional, but some services might use it. Consult your service documentation. @@ -1461,11 +1461,6 @@ You may provide the values for these via any `supported MicroProfile Config API Reported Working S3-Compatible Storage ###################################### -`Minio v2018-09-12 `_ - Set ``dataverse.files..path-style-access=true``, as Minio works path-based. Works pretty smooth, easy to setup. - **Can be used for quick testing, too:** just use the example values above. Uses the public (read: unsecure and - possibly slow) https://play.minio.io:9000 service. - `StorJ Object Store `_ StorJ is a distributed object store that can be configured with an S3 gateway. Per the S3 Storage instructions above, you'll first set up the StorJ S3 store by defining the id, type, and label. After following the general installation, set the following configuration to use a StorJ object store: ``dataverse.files..chunked-encoding=false``. For step-by-step instructions see https://docs.storj.io/dcs/how-tos/dataverse-integration-guide/ From 483e7f2c07fedb23399a2fa2c342f68d322d939a Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:33:41 -0400 Subject: [PATCH 11/20] docs(S3AccessIO): remove MinIO references --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..f8eb9f91bdf 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,21 +610,22 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This implementation - * is problematic, because S3 cannot save an object of an unknown length. This - * effectively nullifies any benefits of streaming; as we cannot start saving - * until we have read the entire stream. One way of solving this would be to - * buffer the entire stream as byte[], in memory, then save it... Which of - * course would be limited by the amount of memory available, and thus would not - * work for streams larger than that. So we have eventually decided to save save - * the stream to a temp file, then save to S3. This is slower, but guaranteed to - * work on any size stream. An alternative we may want to consider is to not - * implement this method in the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle attempts - * to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This + * implementation is problematic, because S3 cannot save an object of an + * unknown length. This effectively nullifies any benefits of streaming; as + * we cannot start saving until we have read the entire stream. One way of + * solving this would be to buffer the entire stream as byte[], in memory, + * then save it... Which of course would be limited by the amount of memory + * available, and thus would not work for streams larger than that. So we + * have eventually decided to save save the stream to a temp file, then save + * to S3. This is slower, but guaranteed to work on any size stream. An + * alternative we may want to consider is to not implement this method in + * the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle + * attempts to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -759,7 +760,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -910,13 +911,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key and - * bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key + * and bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname and - * protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname + * and protocol" + * * @return Main File Key * @throws IOException */ @@ -979,12 +980,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the main - * file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the + * main file label. * @return redirect url * @throws IOException. */ @@ -1003,9 +1004,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1270,7 +1271,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1331,8 +1332,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all (e.g. by - // Minio) so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all, + // so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1521,12 +1522,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if(responseInputStream!= null && responseInputStream.available()>0) { + if (responseInputStream != null && responseInputStream.available() > 0) { responseInputStream.abort(); } } catch (IOException e) { From 73dbf13d608fb62b7b679765c21bbeaaebf7f807 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Fri, 15 May 2026 12:34:47 -0400 Subject: [PATCH 12/20] docs(S3AccessIT): remove MinIO references --- .../harvard/iq/dataverse/api/S3AccessIT.java | 24 ------------------- 1 file changed, 24 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index ed800ede727..2597e77474b 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -319,18 +319,6 @@ public void testDirectUpload() { InputStream inputStream = new ByteArrayInputStream(contentsOfFile.getBytes(StandardCharsets.UTF_8)); Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); - /* - Direct upload to MinIO is failing with errors like this: - - SignatureDoesNotMatch - The request signature we calculated does not match the signature you provided. Check your key and signing method. - 10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - mybucket - /mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - 1793915CCC5BC95C - dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8 - - */ uploadFileDirect.then().assertThat().statusCode(200); // TODO: Use MD5 or whatever Dataverse is configured for and @@ -533,18 +521,6 @@ public void testDirectUploadDetectStataFile() { } Response uploadFileDirect = UtilIT.uploadFileDirect(localhostUrl, inputStream); uploadFileDirect.prettyPrint(); - /* - Direct upload to MinIO is failing with errors like this: - - SignatureDoesNotMatch - The request signature we calculated does not match the signature you provided. Check your key and signing method. - 10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - mybucket - /mybucket/10.5072/FK2/KGFCEJ/18b8c06688c-21b8320a3ee5 - 1793915CCC5BC95C - dd9025bab4ad464b049177c95eb6ebf374d3b3fd1af9251148b658df7ac2e3e8 - - */ uploadFileDirect.then().assertThat().statusCode(200); // TODO: Use MD5 or whatever Dataverse is configured for and From 3d4d86c2b2719c1cdb7ed91f2654428dc91b58a6 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:03:53 -0400 Subject: [PATCH 13/20] fix: remove unnecessary whitespace changes --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index f8eb9f91bdf..6d3fe205639 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,22 +610,21 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This - * implementation is problematic, because S3 cannot save an object of an - * unknown length. This effectively nullifies any benefits of streaming; as - * we cannot start saving until we have read the entire stream. One way of - * solving this would be to buffer the entire stream as byte[], in memory, - * then save it... Which of course would be limited by the amount of memory - * available, and thus would not work for streams larger than that. So we - * have eventually decided to save save the stream to a temp file, then save - * to S3. This is slower, but guaranteed to work on any size stream. An - * alternative we may want to consider is to not implement this method in - * the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle - * attempts to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This implementation + * is problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -760,7 +759,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -911,13 +910,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key - * and bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key and + * bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname - * and protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname and + * protocol" + * * @return Main File Key * @throws IOException */ @@ -980,12 +979,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the - * main file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the main + * file label. * @return redirect url * @throws IOException. */ @@ -1004,9 +1003,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1271,7 +1270,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1332,8 +1331,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all, - // so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all (e.g. by + // Minio) so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1522,12 +1521,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if (responseInputStream != null && responseInputStream.available() > 0) { + if(responseInputStream!= null && responseInputStream.available()>0) { responseInputStream.abort(); } } catch (IOException e) { From c132840132f97b2f225747ef02b2085679cbcd39 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:05:29 -0400 Subject: [PATCH 14/20] docs(S3AccessIO): remove MinIO references --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..2594d762000 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,21 +610,22 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This implementation - * is problematic, because S3 cannot save an object of an unknown length. This - * effectively nullifies any benefits of streaming; as we cannot start saving - * until we have read the entire stream. One way of solving this would be to - * buffer the entire stream as byte[], in memory, then save it... Which of - * course would be limited by the amount of memory available, and thus would not - * work for streams larger than that. So we have eventually decided to save save - * the stream to a temp file, then save to S3. This is slower, but guaranteed to - * work on any size stream. An alternative we may want to consider is to not - * implement this method in the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle attempts - * to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This + * implementation is problematic, because S3 cannot save an object of an + * unknown length. This effectively nullifies any benefits of streaming; as + * we cannot start saving until we have read the entire stream. One way of + * solving this would be to buffer the entire stream as byte[], in memory, + * then save it... Which of course would be limited by the amount of memory + * available, and thus would not work for streams larger than that. So we + * have eventually decided to save save the stream to a temp file, then save + * to S3. This is slower, but guaranteed to work on any size stream. An + * alternative we may want to consider is to not implement this method in + * the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle + * attempts to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -759,7 +760,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -910,13 +911,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key and - * bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key + * and bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname and - * protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname + * and protocol" + * * @return Main File Key * @throws IOException */ @@ -979,12 +980,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the main - * file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the + * main file label. * @return redirect url * @throws IOException. */ @@ -1003,9 +1004,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1270,7 +1271,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1331,8 +1332,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all (e.g. by - // Minio) so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all + // so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1521,12 +1522,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if(responseInputStream!= null && responseInputStream.available()>0) { + if (responseInputStream != null && responseInputStream.available() > 0) { responseInputStream.abort(); } } catch (IOException e) { From 06bd792a760d81ad281b478c6a756bc1fc49d5bd Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:06:02 -0400 Subject: [PATCH 15/20] feat(dev-start-frd.sh): remove MinIO initialization --- scripts/dev/dev-start-frd.sh | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/dev/dev-start-frd.sh b/scripts/dev/dev-start-frd.sh index d113f677bad..d76bed85770 100755 --- a/scripts/dev/dev-start-frd.sh +++ b/scripts/dev/dev-start-frd.sh @@ -28,7 +28,6 @@ mkdir -p docker-dev-volumes/app/secrets mkdir -p docker-dev-volumes/postgresql/data mkdir -p docker-dev-volumes/solr/data mkdir -p docker-dev-volumes/solr/conf -mkdir -p docker-dev-volumes/minio_storage # Only disable DDL generation if database is already initialized # (on first run, we need create-tables to bootstrap the schema) From 4102e852eeac0060bcec283e342aa26dfb9ecd75 Mon Sep 17 00:00:00 2001 From: Snehashish Reddy Manda Date: Tue, 26 May 2026 16:10:12 -0400 Subject: [PATCH 16/20] feat: remove whitespaces --- .../iq/dataverse/dataaccess/S3AccessIO.java | 115 +++++++++--------- 1 file changed, 57 insertions(+), 58 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 2594d762000..6d3fe205639 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -210,7 +210,7 @@ public void open(DataAccessOption... options) throws IOException { + ") is not associated with a bucket."); } } // else we're OK (assumes bucket name in storageidentifier matches the driver's - // bucketname) + // bucketname) } else { if (!storageIdentifier.contains(":")) { // No driver id or bucket @@ -307,8 +307,8 @@ public InputStream getInputStream() throws IOException { try { responseInputStream = s3.getObject(GetObjectRequest.builder().bucket(bucketName).key(key).build(), AsyncResponseTransformer.toBlockingInputStream()).get(); // Since s3 is an S3AsyncClient, we - // need to call .get() to wait for the - // result + // need to call .get() to wait for the + // result setInputStream(responseInputStream); } catch (InterruptedException | ExecutionException e) { // TODO Auto-generated catch block @@ -443,7 +443,7 @@ public void delete() throws IOException { try { DeleteObjectRequest deleteObjRequest = DeleteObjectRequest.builder().bucket(bucketName).key(key).build(); s3.deleteObject(deleteObjRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.delete(): " + e.getMessage()); throw new IOException("Failed to delete storage location " + getStorageLocation(), e); @@ -480,7 +480,7 @@ public boolean isAuxObjectCached(String auxItemTag) throws IOException { .build(); s3.headObject(headObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result return true; } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -499,20 +499,20 @@ public long getAuxObjectSize(String auxItemTag) throws IOException { try { HeadObjectResponse headObjectResponse = s3 .headObject(HeadObjectRequest.builder().bucket(bucketName).key(destinationKey).build()).get(); // Since - // s3 - // is - // an - // S3AsyncClient, - // we - // need - // to - // call - // .get() - // to - // wait - // for - // the - // result + // s3 + // is + // an + // S3AsyncClient, + // we + // need + // to + // call + // .get() + // to + // wait + // for + // the + // result return headObjectResponse.contentLength(); } catch (InterruptedException | ExecutionException e) { if (e.getCause() instanceof NoSuchKeyException) { @@ -539,7 +539,7 @@ public void backupAsAux(String auxItemTag) throws IOException { .destinationBucket(bucketName).destinationKey(destinationKey).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.backupAsAux: " + e.getMessage()); throw new IOException("S3AccessIO: Unable to backup original auxiliary object", e); @@ -554,7 +554,7 @@ public void revertBackupAsAux(String auxItemTag) throws IOException { .sourceKey(destinationKey).destinationBucket(bucketName).destinationKey(key).build(); s3.copyObject(copyObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait for - // the result + // the result deleteAuxObject(auxItemTag); } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.revertBackupAsAux: " + e.getMessage()); @@ -573,7 +573,7 @@ public void savePathAsAux(Path fileSystemPath, String auxItemTag) throws IOExcep .build(); AsyncRequestBody asyncRequestBody = AsyncRequestBody.fromFile(fileSystemPath); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { logger.warning("Caught an exception in S3AccessIO.savePathAsAux(): " + e.getMessage()); throw new IOException("S3AccessIO: Failed to save path as an auxiliary object.", e); @@ -597,7 +597,7 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon executorService); s3.putObject(putObjectRequest, asyncRequestBody).get(); // Since s3 is an S3AsyncClient, we need to call - // .get() to wait for the result + // .get() to wait for the result } catch (InterruptedException | ExecutionException e) { String failureMsg = e.getMessage(); @@ -610,22 +610,21 @@ public void saveInputStreamAsAux(InputStream inputStream, String auxItemTag, Lon } /** - * Implements the StorageIO saveInputStreamAsAux() method. This - * implementation is problematic, because S3 cannot save an object of an - * unknown length. This effectively nullifies any benefits of streaming; as - * we cannot start saving until we have read the entire stream. One way of - * solving this would be to buffer the entire stream as byte[], in memory, - * then save it... Which of course would be limited by the amount of memory - * available, and thus would not work for streams larger than that. So we - * have eventually decided to save save the stream to a temp file, then save - * to S3. This is slower, but guaranteed to work on any size stream. An - * alternative we may want to consider is to not implement this method in - * the S3 driver, and make it throw the - * UnsupportedDataAccessOperationException, similarly to how we handle - * attempts to open OutputStreams, in this and the Swift driver. - * + * Implements the StorageIO saveInputStreamAsAux() method. This implementation + * is problematic, because S3 cannot save an object of an unknown length. This + * effectively nullifies any benefits of streaming; as we cannot start saving + * until we have read the entire stream. One way of solving this would be to + * buffer the entire stream as byte[], in memory, then save it... Which of + * course would be limited by the amount of memory available, and thus would not + * work for streams larger than that. So we have eventually decided to save save + * the stream to a temp file, then save to S3. This is slower, but guaranteed to + * work on any size stream. An alternative we may want to consider is to not + * implement this method in the S3 driver, and make it throw the + * UnsupportedDataAccessOperationException, similarly to how we handle attempts + * to open OutputStreams, in this and the Swift driver. + * * @param inputStream InputStream we want to save - * @param auxItemTag String representing this Auxiliary type ("extension") + * @param auxItemTag String representing this Auxiliary type ("extension") * @throws IOException if anything goes wrong. */ @Override @@ -760,7 +759,7 @@ public void deleteAuxObject(String auxItemTag) throws IOException { .key(destinationKey).build(); s3.deleteObject(deleteObjectRequest).get(); // Since s3 is an S3AsyncClient, we need to call .get() to wait - // for the result + // for the result } catch (InterruptedException | ExecutionException e) { logger.warning("S3AccessIO: Unable to delete object: " + e.getMessage()); throw new IOException("Failed to delete auxiliary object", e); @@ -911,13 +910,13 @@ String getDestinationKey(String auxItemTag) throws IOException { } /** - * TODO: this function is not side effect free (sets instance variables key - * and bucketName). Is this good or bad? Need to ask @landreev + * TODO: this function is not side effect free (sets instance variables key and + * bucketName). Is this good or bad? Need to ask @landreev * * Extract the file key from a file stored on S3. Follows template: "owner - * authority name"/"owner identifier"/"storage identifier without bucketname - * and protocol" - * + * authority name"/"owner identifier"/"storage identifier without bucketname and + * protocol" + * * @return Main File Key * @throws IOException */ @@ -980,12 +979,12 @@ public boolean downloadRedirectEnabled(String auxObjectTag) { /** * Generates a temporary URL for a direct S3 download; either for the main * physical file, or (optionally) for an auxiliary. - * - * @param auxiliaryTag (optional) - * @param auxiliaryType (optional) - aux. mime type, if different from the - * main type - * @param auxiliaryFileName (optional) - file name, if different from the - * main file label. + * + * @param auxiliaryTag (optional) + * @param auxiliaryType (optional) - aux. mime type, if different from the + * main type + * @param auxiliaryFileName (optional) - file name, if different from the main + * file label. * @return redirect url * @throws IOException. */ @@ -1004,9 +1003,9 @@ public String generateTemporaryDownloadUrl(String auxiliaryTag, String auxiliary GetObjectPresignRequest presignRequest = GetObjectPresignRequest.builder() .signatureDuration(expirationDuration) .getObjectRequest(req -> req.bucket(bucketName).key(key) - .responseContentDisposition("attachment; filename*=UTF-8''" - + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) - .responseContentType(contentType)) + .responseContentDisposition("attachment; filename*=UTF-8''" + + URLEncoder.encode(fileName, StandardCharsets.UTF_8).replaceAll("\\+", "%20")) + .responseContentType(contentType)) .build(); PresignedGetObjectRequest presignedRequest; @@ -1271,7 +1270,7 @@ private static S3Presigner getPresigner(String driverId) { } } - + private static AwsCredentialsProvider getCredentialsProvider(String driverId) { if (driverCredentialsProviderMap.containsKey(driverId)) { return driverCredentialsProviderMap.get(driverId); @@ -1332,8 +1331,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all - // so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all (e.g. by + // Minio) so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing @@ -1522,12 +1521,12 @@ private void deleteFile(String fileName) throws IOException { throw new IOException("Failed to delete file", e); } } - + @Override public void closeInputStream() { try { ResponseInputStream responseInputStream = (ResponseInputStream) getInputStream(); - if (responseInputStream != null && responseInputStream.available() > 0) { + if(responseInputStream!= null && responseInputStream.available()>0) { responseInputStream.abort(); } } catch (IOException e) { From c5a3d41f96cda9b535390747deb843ecdd4f2e09 Mon Sep 17 00:00:00 2001 From: Ash Manda Date: Tue, 26 May 2026 16:11:45 -0400 Subject: [PATCH 17/20] Clarify comment on S3 tags support Updated comment for clarity regarding S3 tags implementation. --- .../java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java index 6d3fe205639..74c22f4ce3e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java +++ b/src/main/java/edu/harvard/iq/dataverse/dataaccess/S3AccessIO.java @@ -1331,8 +1331,8 @@ public void removeTempTag() throws IOException { if (e.getCause() instanceof S3Exception) { S3Exception s3e = (S3Exception) e.getCause(); if (s3e.statusCode() == 501) { - // In this case, it's likely that tags are not implemented at all (e.g. by - // Minio) so no tag was set either and it's just something to be aware of + // In this case, it's likely that tags are not implemented at all + // so no tag was set either and it's just something to be aware of logger.warning("Temp tag not deleted: Object tags not supported by storage: " + driverId); } else { // In this case, the assumption is that adding tags has worked, so not removing From c9dc95fad4ccf53883ea6caa395928320008c19c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 22:43:33 +0000 Subject: [PATCH 18/20] Ensure LocalStack no-redirect bucket exists for S3 tests --- conf/localstack/buckets.sh | 1 + .../edu/harvard/iq/dataverse/api/S3AccessIT.java | 16 +++++++++++++--- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/conf/localstack/buckets.sh b/conf/localstack/buckets.sh index fe940d9890d..bd901c19634 100755 --- a/conf/localstack/buckets.sh +++ b/conf/localstack/buckets.sh @@ -1,3 +1,4 @@ #!/usr/bin/env bash # https://stackoverflow.com/questions/53619901/auto-create-s3-buckets-on-localstack awslocal s3 mb s3://mybucket +awslocal s3 mb s3://mybucket-noredirect diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 2597e77474b..7ecc3c7d480 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -71,11 +71,21 @@ public static void setUp() { .region(Region.US_EAST_2) .build(); - // create bucket if it doesn't exist + ensureBucketExists(BUCKET_NAME); + ensureBucketExists(BUCKET_NAME_NOREDIRECT); + } + + private static void ensureBucketExists(String bucketName) { try { - s3localstack.headBucket(HeadBucketRequest.builder().bucket(BUCKET_NAME).build()); + s3localstack.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); } catch (NoSuchBucketException ex) { - s3localstack.createBucket(CreateBucketRequest.builder().bucket(BUCKET_NAME).build()); + s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); + } catch (S3Exception ex) { + if (ex.statusCode() == 404) { + s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); + } else { + throw ex; + } } } From 647dd5190039908c8a18f5e6343f106e8873f7d4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 22:44:22 +0000 Subject: [PATCH 19/20] Handle generic S3 404 in S3AccessIT bucket setup --- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 7ecc3c7d480..7363837b8d5 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -78,10 +78,8 @@ public static void setUp() { private static void ensureBucketExists(String bucketName) { try { s3localstack.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); - } catch (NoSuchBucketException ex) { - s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); } catch (S3Exception ex) { - if (ex.statusCode() == 404) { + if (ex.statusCode() == 404 || "NoSuchBucket".equals(ex.awsErrorDetails().errorCode())) { s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); } else { throw ex; From 84b0d188cb5b7f70f8d318f82a2298319b1d0ea9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 26 May 2026 22:44:55 +0000 Subject: [PATCH 20/20] Null-check S3 error details in bucket existence helper --- src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java index 7363837b8d5..bdc9049e519 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/S3AccessIT.java @@ -79,7 +79,8 @@ private static void ensureBucketExists(String bucketName) { try { s3localstack.headBucket(HeadBucketRequest.builder().bucket(bucketName).build()); } catch (S3Exception ex) { - if (ex.statusCode() == 404 || "NoSuchBucket".equals(ex.awsErrorDetails().errorCode())) { + String errorCode = ex.awsErrorDetails() == null ? null : ex.awsErrorDetails().errorCode(); + if (ex.statusCode() == 404 || "NoSuchBucket".equals(errorCode)) { s3localstack.createBucket(CreateBucketRequest.builder().bucket(bucketName).build()); } else { throw ex;