From 9bf258d7772b817380f6934e93b6b0762e4cefe6 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 25 May 2026 13:05:51 +0200 Subject: [PATCH 01/11] Fixed integration tests because they use to fail sometimes --- .../AbstractIntegrationTestWithDatabase.java | 24 +++++++- .../CachingOrcidRestConnectorTest.java | 10 +++- .../rest/AuthenticationRestControllerIT.java | 57 +++++++++++++------ 3 files changed, 68 insertions(+), 23 deletions(-) diff --git a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java index d5bb6ab8a4c8..bc32a14ca31b 100644 --- a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java +++ b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java @@ -10,6 +10,7 @@ import static org.junit.Assert.fail; import java.sql.SQLException; +import java.util.ConcurrentModificationException; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -168,9 +169,26 @@ public void setUp() throws Exception { public void destroy() throws Exception { // Cleanup our global context object try { - AbstractBuilder.cleanupObjects(); - parentCommunity = null; - cleanupContext(); + // Builders/cleanupContext can trigger a transactional commit through Hibernate. + // Older Hibernate releases have a race in ResourceRegistryStandardImpl#releaseResources + // where the close() callback removes the just-iterated entry from the registry map, + // causing an intermittent ConcurrentModificationException (see HHH-15116). + // The DB cleanup work is best-effort here (test data is purged per-class anyway), + // so on CME we abort the context to release the JDBC connection and continue + // with the remaining (Solr/config) cleanup instead of failing an already-passed test. + try { + AbstractBuilder.cleanupObjects(); + parentCommunity = null; + cleanupContext(); + } catch (ConcurrentModificationException cme) { + log.warn("Ignoring transient Hibernate CME during @After cleanup (HHH-15116); " + + "aborting context and continuing.", cme); + if (context != null && context.isValid()) { + context.abort(); + } + context = null; + parentCommunity = null; + } ServiceManager serviceManager = DSpaceServicesFactory.getInstance().getServiceManager(); // Clear the search core. diff --git a/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java b/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java index bdb051601cb8..79fee69d531c 100644 --- a/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java +++ b/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java @@ -76,8 +76,14 @@ public void search() { doReturn(sandboxToken).when(sut).getAccessToken(Mockito.anyString(), Mockito.anyString(), Mockito.anyString()); ExpandedSearchConverter.Results search = sut.search("joh", 0, 1); - //Should match all Johns also, because edismax with wildcard - assertTrue(search.numFound() > 1000); + //Should match all Johns also, because edismax with wildcard. + //We hit the live ORCID sandbox here, so the exact count fluctuates with sandbox data. + //Only assert the call succeeded and the wildcard returned more hits than the requested page size + //(which proves both the API call and the edismax wildcard expansion work) -- using a small, + //stable lower bound to avoid CI flakiness when the sandbox dataset shrinks. + assertTrue("Expected a successful ORCID sandbox response, got: " + search, search.isOk()); + assertTrue("Expected edismax wildcard to return more than 1 match for 'joh', got: " + + search.numFound(), search.numFound() > 1); } @Test diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java index 63318926d752..26262ed63018 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java @@ -131,6 +131,27 @@ public class AuthenticationRestControllerIT extends AbstractControllerIntegratio private final String feature = CanChangePasswordFeature.NAME; + /** + * Configuration key for the ordered list of active AuthenticationMethod plugins. + */ + private static final String AUTH_PLUGIN_KEY = + "plugin.sequence.org.dspace.authenticate.AuthenticationMethod"; + + /** + * Replace the active AuthenticationMethod plugin sequence. + * + *

Calling {@link org.dspace.services.ConfigurationService#setProperty(String, Object)} + * directly with a {@code String[]} value has shown intermittent leakage of previous values + * in the underlying Apache Commons Configuration in-memory overlay (causing flaky + * {@code WWW-Authenticate} headers that include realms from prior tests, e.g. a stray + * {@code password realm} appearing when only Shibboleth should be active). Explicitly + * clearing the property first guarantees a clean replacement.

+ */ + private void setAuthenticationMethodSequence(String[] methods) { + configurationService.setProperty(AUTH_PLUGIN_KEY, null); + configurationService.setProperty(AUTH_PLUGIN_KEY, methods); + } + @Before public void setup() throws Exception { super.setUp(); @@ -140,7 +161,7 @@ public void setup() throws Exception { authorization = new Authorization(eperson, canChangePasswordFeature, ePersonRest); // Default all tests to Password Authentication only - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", PASS_ONLY); + setAuthenticationMethodSequence(PASS_ONLY); } @Test @@ -198,7 +219,7 @@ public void testStatusGetSpecialGroups() throws Exception { .withName("specialGroupIP") .build(); - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", PASS_AND_IP); + setAuthenticationMethodSequence(PASS_AND_IP); configurationService.setProperty("authentication-password.login.specialgroup","specialGroupPwd"); configurationService.setProperty("authentication-ip.specialGroupIP", "123.123.123.123"); context.restoreAuthSystemState(); @@ -338,7 +359,7 @@ public void testStatusNotAuthenticated() throws Exception { // @Test // public void testStatusShibAuthenticatedWithCookie() throws Exception { // //Enable Shibboleth login only -// configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); +// setAuthenticationMethodSequence(SHIB_ONLY); // // String uiURL = configurationService.getProperty("dspace.ui.url"); // @@ -458,7 +479,7 @@ public void testStatusNotAuthenticated() throws Exception { // @Test // public void testShibbolethEndpointCannotBeUsedWithShibDisabled() throws Exception { // // Enable only password login -// configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", PASS_ONLY); +// setAuthenticationMethodSequence(PASS_ONLY); // // String uiURL = configurationService.getProperty("dspace.ui.url"); // @@ -977,7 +998,7 @@ public void testLoginGetRequest() throws Exception { public void testShibbolethLoginURLWithDefaultLazyURL() throws Exception { context.turnOffAuthorisationSystem(); //Enable Shibboleth login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); + setAuthenticationMethodSequence(SHIB_ONLY); //Create a reviewers group Group reviewersGroup = GroupBuilder.createGroup(context) @@ -1001,7 +1022,7 @@ public void testShibbolethLoginURLWithDefaultLazyURL() throws Exception { public void testShibbolethLoginURLWithServerURLContainingPort() throws Exception { context.turnOffAuthorisationSystem(); //Enable Shibboleth login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); + setAuthenticationMethodSequence(SHIB_ONLY); configurationService.setProperty("dspace.server.url", "http://localhost:8080/server"); configurationService.setProperty("authentication-shibboleth.lazysession.secure", false); @@ -1027,7 +1048,7 @@ public void testShibbolethLoginURLWithServerURLContainingPort() throws Exception public void testShibbolethLoginURLWithConfiguredLazyURL() throws Exception { context.turnOffAuthorisationSystem(); //Enable Shibboleth login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); + setAuthenticationMethodSequence(SHIB_ONLY); configurationService.setProperty("authentication-shibboleth.lazysession.loginurl", "http://shibboleth.org/Shibboleth.sso/Login"); @@ -1053,7 +1074,7 @@ public void testShibbolethLoginURLWithConfiguredLazyURL() throws Exception { public void testShibbolethLoginURLWithConfiguredLazyURLWithPort() throws Exception { context.turnOffAuthorisationSystem(); //Enable Shibboleth login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); + setAuthenticationMethodSequence(SHIB_ONLY); configurationService.setProperty("authentication-shibboleth.lazysession.loginurl", "http://shibboleth.org:8080/Shibboleth.sso/Login"); @@ -1081,7 +1102,7 @@ public void testShibbolethLoginURLWithConfiguredLazyURLWithPort() throws Excepti public void testShibbolethLoginRequestAttribute() throws Exception { context.turnOffAuthorisationSystem(); //Enable Shibboleth login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); + setAuthenticationMethodSequence(SHIB_ONLY); //Create a reviewers group Group reviewersGroup = GroupBuilder.createGroup(context) @@ -1137,7 +1158,7 @@ public void testShibbolethLoginRequestAttribute() throws Exception { @Ignore // Ignored until an endpoint is added to return all groups public void testShibbolethLoginRequestHeaderWithIpAuthentication() throws Exception { - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_AND_IP); + setAuthenticationMethodSequence(SHIB_AND_IP); configurationService.setProperty("authentication-ip.Administrator", "123.123.123.123"); @@ -1210,7 +1231,7 @@ public void testShibbolethLoginRequestHeaderWithIpAuthentication() throws Except @Test public void testShibbolethAndPasswordAuthentication() throws Exception { //Enable Shibboleth and password login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_AND_PASS); + setAuthenticationMethodSequence(SHIB_AND_PASS); //Check if WWW-Authenticate header contains shibboleth and password getClient().perform(get("/api/authn/status").header("Referer", "http://my.uni.edu")) @@ -1281,7 +1302,7 @@ public void testShibbolethAndPasswordAuthentication() throws Exception { @Test public void testOnlyPasswordAuthenticationWorks() throws Exception { //Enable only password login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", PASS_ONLY); + setAuthenticationMethodSequence(PASS_ONLY); //Check if WWW-Authenticate header contains only getClient().perform(get("/api/authn/status").header("Referer", "http://my.uni.edu")) @@ -1314,7 +1335,7 @@ public void testOnlyPasswordAuthenticationWorks() throws Exception { @Test public void testShibbolethAuthenticationDoesNotWorkWithPassOnly() throws Exception { //Enable only password login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", PASS_ONLY); + setAuthenticationMethodSequence(PASS_ONLY); //Check if WWW-Authenticate header contains only password getClient().perform(get("/api/authn/status").header("Referer", "http://my.uni.edu")) @@ -1332,7 +1353,7 @@ public void testShibbolethAuthenticationDoesNotWorkWithPassOnly() throws Excepti @Test public void testOnlyShibbolethAuthenticationWorks() throws Exception { //Enable only Shibboleth login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); + setAuthenticationMethodSequence(SHIB_ONLY); //Check if WWW-Authenticate header contains only shibboleth getClient().perform(get("/api/authn/status").header("Referer", "http://my.uni.edu")) @@ -1365,7 +1386,7 @@ public void testOnlyShibbolethAuthenticationWorks() throws Exception { @Test public void testPasswordAuthenticationDoesNotWorkWithShibOnly() throws Exception { //Enable only Shibboleth login - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_ONLY); + setAuthenticationMethodSequence(SHIB_ONLY); getClient().perform(post("/api/authn/login") .param("user", eperson.getEmail()) @@ -1540,7 +1561,7 @@ public void testGenerateShortLivedTokenWithShortLivedToken() throws Exception { // @Test // public void testStatusOrcidAuthenticatedWithCookie() throws Exception { // -// configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", ORCID_ONLY); +// setAuthenticationMethodSequence(ORCID_ONLY); // // String uiURL = configurationService.getProperty("dspace.ui.url"); // @@ -1627,7 +1648,7 @@ public void testGenerateShortLivedTokenWithShortLivedToken() throws Exception { @Test public void testOrcidLoginURL() throws Exception { - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", ORCID_ONLY); + setAuthenticationMethodSequence(ORCID_ONLY); String originalClientId = orcidConfiguration.getClientId(); orcidConfiguration.setClientId("CLIENT-ID"); @@ -1658,7 +1679,7 @@ public void testAreSpecialGroupsApplicable() throws Exception { .withName("specialGroupShib") .build(); - configurationService.setProperty("plugin.sequence.org.dspace.authenticate.AuthenticationMethod", SHIB_AND_PASS); + setAuthenticationMethodSequence(SHIB_AND_PASS); configurationService.setProperty("authentication-password.login.specialgroup", "specialGroupPwd"); configurationService.setProperty("authentication-shibboleth.role.faculty", "specialGroupShib"); configurationService.setProperty("authentication-shibboleth.default-roles", "faculty"); From bb09815fc8adc44c08927756e474022ecf409a8c Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 25 May 2026 13:55:29 +0200 Subject: [PATCH 02/11] test: stabilize flaky CI tests (Hibernate cleanup retry, Shibboleth auth sequence reset, ORCID assertion hardening) --- .../AbstractIntegrationTestWithDatabase.java | 46 ++++++++++++------- 1 file changed, 29 insertions(+), 17 deletions(-) diff --git a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java index bc32a14ca31b..14ada51b744a 100644 --- a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java +++ b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java @@ -169,25 +169,37 @@ public void setUp() throws Exception { public void destroy() throws Exception { // Cleanup our global context object try { - // Builders/cleanupContext can trigger a transactional commit through Hibernate. + // Builders/cleanupContext can trigger transactional commits through Hibernate. // Older Hibernate releases have a race in ResourceRegistryStandardImpl#releaseResources - // where the close() callback removes the just-iterated entry from the registry map, - // causing an intermittent ConcurrentModificationException (see HHH-15116). - // The DB cleanup work is best-effort here (test data is purged per-class anyway), - // so on CME we abort the context to release the JDBC connection and continue - // with the remaining (Solr/config) cleanup instead of failing an already-passed test. - try { - AbstractBuilder.cleanupObjects(); - parentCommunity = null; - cleanupContext(); - } catch (ConcurrentModificationException cme) { - log.warn("Ignoring transient Hibernate CME during @After cleanup (HHH-15116); " - + "aborting context and continuing.", cme); - if (context != null && context.isValid()) { - context.abort(); + // which may raise an intermittent ConcurrentModificationException (HHH-15116). + // Retry cleanup after aborting the context so we don't silently skip DB cleanup. + final int maxCleanupAttempts = 3; + boolean cleanupComplete = false; + for (int cleanupAttempt = 1; cleanupAttempt <= maxCleanupAttempts; cleanupAttempt++) { + try { + AbstractBuilder.cleanupObjects(); + parentCommunity = null; + cleanupContext(); + cleanupComplete = true; + break; + } catch (ConcurrentModificationException cme) { + log.warn("Transient Hibernate CME during @After cleanup (HHH-15116), attempt {}/{}; " + + "aborting context and retrying cleanup.", cleanupAttempt, maxCleanupAttempts, cme); + if (context != null && context.isValid()) { + context.abort(); + } + context = null; + parentCommunity = null; + + if (cleanupAttempt < maxCleanupAttempts) { + context = new Context(Context.Mode.READ_WRITE); + context.turnOffAuthorisationSystem(); + } } - context = null; - parentCommunity = null; + } + + if (!cleanupComplete) { + throw new IllegalStateException("Unable to complete @After DB cleanup after retries."); } ServiceManager serviceManager = DSpaceServicesFactory.getInstance().getServiceManager(); From 6e86590e0a01d62e40063044a1dbb9fb0a89bea1 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Tue, 2 Jun 2026 17:30:37 +0200 Subject: [PATCH 03/11] test: fix flaky ITs at the source (live ORCID, Shibboleth config-reload) + Hibernate CME diagnostics ORCID CachingOrcidRestConnectorTest no longer hits the live ORCID sandbox: search/getLabel/search_fail mock the HTTP layer (httpGet made protected) with a canned expanded-search response, so they are deterministic instead of asserting against fluctuating sandbox data. Shibboleth WWW-Authenticate flakiness: add a test-only config-definition.xml with config-reload=false. Runtime setProperty(...AuthenticationMethod...) overrides were silently discarded whenever the auto-reload listener rebuilt the combined config (restoring clarin-dspace.cfg's [Password, ClarinShib] default), intermittently leaking 'password realm' into the header. Verified: with auto-reload off the override survives; the explicit reloadConfig() reset in @After still works. Hibernate ConcurrentModificationException in @After cleanup: the per-session JDBC ResourceRegistry is not thread-safe, so the CME means two threads touch one Session. Capture a full thread dump on CME (target/cme-dumps/) to identify the colliding thread in CI; keep a resilient retry so an already-passed test isn't failed by this teardown race. (Context.finalize() ruled out: sessions are thread-local.) Co-Authored-By: Claude Opus 4.8 --- .../external/CachingOrcidRestConnector.java | 4 +- .../dspaceFolder/config/config-definition.xml | 56 ++++++++++++++++ .../AbstractIntegrationTestWithDatabase.java | 66 +++++++++++++++++-- .../CachingOrcidRestConnectorTest.java | 52 +++++++++++---- .../dspace/external/orcid-expanded-search.xml | 22 +++++++ 5 files changed, 180 insertions(+), 20 deletions(-) create mode 100644 dspace-api/src/test/data/dspaceFolder/config/config-definition.xml create mode 100644 dspace-api/src/test/resources/org/dspace/external/orcid-expanded-search.xml diff --git a/dspace-api/src/main/java/org/dspace/external/CachingOrcidRestConnector.java b/dspace-api/src/main/java/org/dspace/external/CachingOrcidRestConnector.java index e34767a25063..4eee572d5a6f 100644 --- a/dspace-api/src/main/java/org/dspace/external/CachingOrcidRestConnector.java +++ b/dspace-api/src/main/java/org/dspace/external/CachingOrcidRestConnector.java @@ -174,7 +174,9 @@ protected String getAccessToken(String clientSecret, String clientId, String OAU } } - private InputStream httpGet(String path, String accessToken) throws IOException { + // Package/sub-class visible so tests can stub the HTTP layer (see CachingOrcidRestConnectorTest) + // and avoid hitting the live ORCID sandbox. + protected InputStream httpGet(String path, String accessToken) throws IOException { String trimmedPath = path.replaceFirst("^/+", "").replaceFirst("/+$", ""); String fullPath = apiURL + '/' + trimmedPath; diff --git a/dspace-api/src/test/data/dspaceFolder/config/config-definition.xml b/dspace-api/src/test/data/dspaceFolder/config/config-definition.xml new file mode 100644 index 000000000000..66e0112f49b1 --- /dev/null +++ b/dspace-api/src/test/data/dspaceFolder/config/config-definition.xml @@ -0,0 +1,56 @@ + + + +
+ + + +
+ + + + + + + + + + + + + + + + + + + +
diff --git a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java index 14ada51b744a..d31799855fcc 100644 --- a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java +++ b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java @@ -9,8 +9,12 @@ import static org.junit.Assert.fail; +import java.io.File; +import java.io.PrintWriter; import java.sql.SQLException; import java.util.ConcurrentModificationException; +import java.util.Map; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -169,10 +173,15 @@ public void setUp() throws Exception { public void destroy() throws Exception { // Cleanup our global context object try { - // Builders/cleanupContext can trigger transactional commits through Hibernate. - // Older Hibernate releases have a race in ResourceRegistryStandardImpl#releaseResources - // which may raise an intermittent ConcurrentModificationException (HHH-15116). - // Retry cleanup after aborting the context so we don't silently skip DB cleanup. + // Builders/cleanupContext commit transactions through Hibernate. We have observed a rare, + // CI-only intermittent ConcurrentModificationException thrown from + // ResourceRegistryStandardImpl#releaseResources (a HashMap.forEach over the JDBC resource + // registry) while committing here. That registry is per-Hibernate-Session and explicitly NOT + // thread-safe, so the CME means a second thread is touching the same Session concurrently + // during cleanup. The exact offending thread has not yet been identified (it does not + // reproduce locally), so on CME we (a) capture a full thread dump to pinpoint the colliding + // thread the next time it happens in CI, and (b) retry the cleanup so an already-passed test + // is not failed by this teardown race. See dumpAllThreadsOnCme(). final int maxCleanupAttempts = 3; boolean cleanupComplete = false; for (int cleanupAttempt = 1; cleanupAttempt <= maxCleanupAttempts; cleanupAttempt++) { @@ -183,8 +192,12 @@ public void destroy() throws Exception { cleanupComplete = true; break; } catch (ConcurrentModificationException cme) { - log.warn("Transient Hibernate CME during @After cleanup (HHH-15116), attempt {}/{}; " - + "aborting context and retrying cleanup.", cleanupAttempt, maxCleanupAttempts, cme); + // Capture a full thread dump the instant the CME is caught, so we can see which OTHER + // thread is concurrently inside JDBC/Hibernate code on the same (non-thread-safe) session. + dumpAllThreadsOnCme(cme, cleanupAttempt); + log.warn("Transient Hibernate CME during @After cleanup (concurrent access to the " + + "per-session JDBC resource registry), attempt {}/{}; aborting context, capturing a " + + "thread dump and retrying cleanup.", cleanupAttempt, maxCleanupAttempts, cme); if (context != null && context.isValid()) { context.abort(); } @@ -232,6 +245,47 @@ public void destroy() throws Exception { } } + // Counter to give each captured dump a unique file name. + private static final AtomicInteger CME_DUMP_COUNTER = new AtomicInteger(0); + + /** + * Diagnostic helper: when a ConcurrentModificationException is caught during @After cleanup, dump the + * stack traces of ALL live threads to a file under target/cme-dumps/ (archived as a CI artifact). The + * CME is thrown on the test thread while another thread concurrently mutates the same Hibernate + * session's (non-thread-safe) JDBC ResourceRegistry; this dump is meant to reveal that other thread so + * the underlying concurrency bug can be fixed at its source. + */ + private static void dumpAllThreadsOnCme(ConcurrentModificationException cme, int attempt) { + try { + File dir = new File("target/cme-dumps"); + dir.mkdirs(); + int idx = CME_DUMP_COUNTER.incrementAndGet(); + File out = new File(dir, "cme-" + System.currentTimeMillis() + "-" + idx + "-attempt" + attempt + ".txt"); + try (PrintWriter pw = new PrintWriter(out, "UTF-8")) { + pw.println("===== ConcurrentModificationException caught during @After cleanup ====="); + pw.println("Caught on thread: " + Thread.currentThread().getName()); + pw.println("Attempt: " + attempt); + pw.println(); + pw.println("----- CME stack -----"); + cme.printStackTrace(pw); + pw.println(); + pw.println("----- ALL THREAD STACKS at moment of CME -----"); + for (Map.Entry e : Thread.getAllStackTraces().entrySet()) { + Thread t = e.getKey(); + pw.println(); + pw.println("\"" + t.getName() + "\" id=" + t.getId() + " state=" + t.getState() + + " daemon=" + t.isDaemon()); + for (StackTraceElement ste : e.getValue()) { + pw.println("\tat " + ste); + } + } + } + log.error("CME thread dump written to {}", out.getAbsolutePath()); + } catch (Exception dumpEx) { + log.error("Failed to write CME thread dump", dumpEx); + } + } + /** * Utility method to cleanup a created Context object (to save memory). * This can also be used by individual tests to cleanup context objects they create. diff --git a/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java b/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java index 79fee69d531c..7e8cbc6c94fc 100644 --- a/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java +++ b/dspace-api/src/test/java/org/dspace/external/CachingOrcidRestConnectorTest.java @@ -13,9 +13,13 @@ import static org.junit.Assert.assertNull; import static org.junit.Assert.assertTrue; import static org.mockito.Mockito.doReturn; +import static org.mockito.Mockito.doThrow; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; +import java.io.IOException; +import java.io.InputStream; + import org.dspace.AbstractDSpaceTest; import org.dspace.external.provider.orcid.xml.ExpandedSearchConverter; import org.dspace.utils.DSpace; @@ -33,8 +37,22 @@ public class CachingOrcidRestConnectorTest extends AbstractDSpaceTest { private static final String orcid = "0000-0002-9150-2529"; private static final String expectedLabel = "Connor, John"; + // Canned ORCID "expanded-search" response (num-found=1725, first result -> "Connor, John"). + // Used to mock the HTTP layer so the tests don't depend on the live ORCID sandbox. + private static final String EXPANDED_SEARCH_XML = "org/dspace/external/orcid-expanded-search.xml"; + private CachingOrcidRestConnector sut; + /** + * Load a canned API response from the test classpath as a fresh InputStream. + * (A new stream is returned on every call because the connector consumes/closes it.) + */ + private InputStream cannedResponse(String resource) { + InputStream is = getClass().getClassLoader().getResourceAsStream(resource); + assertNotNull("Missing test resource: " + resource, is); + return is; + } + @Before public void setup() { sut = new CachingOrcidRestConnector(); @@ -59,46 +77,54 @@ public void getAccessToken() { } @Test - public void getLabel() { + public void getLabel() throws Exception { sut = Mockito.spy(sut); sut.setApiURL("https://pub.sandbox.orcid.org/v3.0"); //Mock the CachingOrcidRestConnector so that getAccessToken returns sandboxToken doReturn(sandboxToken).when(sut).getAccessToken(Mockito.anyString(), Mockito.anyString(), Mockito.anyString()); + //Mock the HTTP layer with a canned response so we don't depend on the live ORCID sandbox. + doReturn(cannedResponse(EXPANDED_SEARCH_XML)).when(sut).httpGet(Mockito.anyString(), Mockito.anyString()); String label = sut.getLabel(orcid); assertEquals(expectedLabel, label); } @Test - public void search() { + public void search() throws Exception { sut = Mockito.spy(sut); sut.setApiURL("https://pub.sandbox.orcid.org/v3.0"); //Mock the CachingOrcidRestConnector so that getAccessToken returns sandboxToken doReturn(sandboxToken).when(sut).getAccessToken(Mockito.anyString(), Mockito.anyString(), Mockito.anyString()); + //Mock the HTTP layer with a canned ORCID expanded-search response. Previously this test hit the live + //ORCID sandbox and asserted numFound() > 1000, which flaked whenever the sandbox dataset was reset/shrunk. + //Mocking the transport keeps the real parsing + edismax wildcard query-building path under test, but makes + //the result deterministic. + doReturn(cannedResponse(EXPANDED_SEARCH_XML)).when(sut).httpGet(Mockito.anyString(), Mockito.anyString()); ExpandedSearchConverter.Results search = sut.search("joh", 0, 1); - //Should match all Johns also, because edismax with wildcard. - //We hit the live ORCID sandbox here, so the exact count fluctuates with sandbox data. - //Only assert the call succeeded and the wildcard returned more hits than the requested page size - //(which proves both the API call and the edismax wildcard expansion work) -- using a small, - //stable lower bound to avoid CI flakiness when the sandbox dataset shrinks. - assertTrue("Expected a successful ORCID sandbox response, got: " + search, search.isOk()); - assertTrue("Expected edismax wildcard to return more than 1 match for 'joh', got: " - + search.numFound(), search.numFound() > 1); + assertTrue("Expected a successful ORCID response, got: " + search, search.isOk()); + //'joh' is alphabetic, so the connector turns it into an edismax wildcard query ("joh || joh*") that matches + //many authors; the canned response carries num-found=1725. + assertEquals("Unexpected num-found for the canned ORCID response", 1725L, (long) search.numFound()); + assertEquals("Connor, John", search.results().get(0).label()); } @Test - public void search_fail() { + public void search_fail() throws Exception { sut = Mockito.spy(sut); sut.setApiURL("https://pub.sandbox.orcid.org/v3.0"); - //Mock the CachingOrcidRestConnector so that getAccessToken returns and invalid token + //Mock the CachingOrcidRestConnector so that getAccessToken returns an invalid token doReturn("FAKE").when(sut).getAccessToken(Mockito.anyString(), Mockito.anyString(), Mockito.anyString()); + //Simulate the ORCID API rejecting the (fake) token: every httpGet fails. Done via the mocked HTTP layer + //so the test is deterministic and doesn't rely on the live sandbox returning a 401. + doThrow(new IOException("simulated ORCID auth failure")).when(sut) + .httpGet(Mockito.anyString(), Mockito.anyString()); ExpandedSearchConverter.Results search = sut.search("joh", 0, 1); assertFalse(search.isOk()); - //Further calls fail too, token is stored + //Further calls fail too, token is stored (so getAccessToken is only resolved once) search = sut.search("joh", 0, 1); assertFalse(search.isOk()); diff --git a/dspace-api/src/test/resources/org/dspace/external/orcid-expanded-search.xml b/dspace-api/src/test/resources/org/dspace/external/orcid-expanded-search.xml new file mode 100644 index 000000000000..582731a42038 --- /dev/null +++ b/dspace-api/src/test/resources/org/dspace/external/orcid-expanded-search.xml @@ -0,0 +1,22 @@ + + + + + 0000-0002-9150-2529 + John + Connor + + + 0000-0002-1208-2352 + John + Kendrew + John Kendrew + + From b6e300ee56fdb4e65408446851ea07afb16052ae Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 3 Jun 2026 07:52:10 +0200 Subject: [PATCH 04/11] test: revert IT-env config-reload=false override Disabling config auto-reload globally in the test environment broke AuthorizeConfigIT.testReloadConfiguration, which deliberately verifies that AuthorizeConfiguration picks up live changes written to local.cfg via the auto-reload mechanism. Auto-reload is a tested feature here, so it must not be disabled to work around the Shibboleth WWW-Authenticate flakiness. The Shibboleth flakiness (runtime setProperty override discarded when the combined config is rebuilt) needs a reload-safe fix in the auth test instead; tracked separately. Co-Authored-By: Claude Opus 4.8 --- .../dspaceFolder/config/config-definition.xml | 56 ------------------- 1 file changed, 56 deletions(-) delete mode 100644 dspace-api/src/test/data/dspaceFolder/config/config-definition.xml diff --git a/dspace-api/src/test/data/dspaceFolder/config/config-definition.xml b/dspace-api/src/test/data/dspaceFolder/config/config-definition.xml deleted file mode 100644 index 66e0112f49b1..000000000000 --- a/dspace-api/src/test/data/dspaceFolder/config/config-definition.xml +++ /dev/null @@ -1,56 +0,0 @@ - - - -
- - - -
- - - - - - - - - - - - - - - - - - - -
From 106de960d79103b11df51d50a1b891ea5162457c Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Wed, 3 Jun 2026 09:46:03 +0200 Subject: [PATCH 05/11] test: make Shibboleth auth-sequence override reload-safe (fix WWW-Authenticate flakiness) The flaky 'password realm' leak in AuthenticationRestControllerIT had this root cause: configurationService.setProperty(plugin.sequence...AuthenticationMethod, ...) only updates the in-memory view of the combined configuration. That view is discarded whenever it is rebuilt, and the auto-reload listener rebuilds it as soon as any reloadable cfg file's mtime changes mid-run (e.g. another test writing local.cfg). When that rebuild lands between the override and the request, clarin-dspace.cfg's default [PasswordAuthentication, ClarinShibAuthentication] returns and 'password realm' leaks into the header. The previous clear-then-set helper did not help (it is equivalent to a plain setProperty). Fix: set the sequence via a JVM system property (highest-precedence override layer, re-read on every rebuild) + reloadConfig(), and clear it in @After. This survives auto-reload without disabling it (so AuthorizeConfigIT, which verifies auto-reload, still passes). Verified in the real /api/authn/status endpoint: an explicit reloadConfig() after a setProperty override reproduces the leak, while the system-property approach keeps the header Shibboleth-only across rebuilds. Full AuthenticationRestControllerIT (43 tests) passes, and running it alongside ClarinAuthenticationRestControllerIT / AnonymousAdditionalAuthorizationFilterIT confirms the property does not leak across classes. Co-Authored-By: Claude Opus 4.8 --- .../rest/AuthenticationRestControllerIT.java | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java index 26262ed63018..24afcd48f98f 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java @@ -71,6 +71,7 @@ import org.dspace.orcid.model.OrcidTokenResponseDTO; import org.dspace.services.ConfigurationService; import org.hamcrest.Matchers; +import org.junit.After; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; @@ -140,16 +141,33 @@ public class AuthenticationRestControllerIT extends AbstractControllerIntegratio /** * Replace the active AuthenticationMethod plugin sequence. * - *

Calling {@link org.dspace.services.ConfigurationService#setProperty(String, Object)} - * directly with a {@code String[]} value has shown intermittent leakage of previous values - * in the underlying Apache Commons Configuration in-memory overlay (causing flaky - * {@code WWW-Authenticate} headers that include realms from prior tests, e.g. a stray - * {@code password realm} appearing when only Shibboleth should be active). Explicitly - * clearing the property first guarantees a clean replacement.

+ *

This sets the sequence via a JVM system property (plus an explicit + * {@link org.dspace.services.ConfigurationService#reloadConfig()} so the change is visible + * immediately) rather than via {@link org.dspace.services.ConfigurationService#setProperty(String, Object)}.

+ * + *

A plain {@code setProperty(...)} override only lives in the in-memory view of the combined + * configuration and is silently discarded whenever that view is rebuilt. The auto-reload listener + * rebuilds it as soon as any reloadable cfg file's last-modified timestamp changes (which happens + * intermittently during a CI run, e.g. another test writing {@code local.cfg}). When that rebuild lands + * between this call and the request under test, the on-disk default returns -- in CLARIN that default is + * {@code [PasswordAuthentication, ClarinShibAuthentication]} -- and a stray {@code password realm} leaks + * into the {@code WWW-Authenticate} header even though only e.g. Shibboleth was requested. A system + * property sits in the highest-precedence (override) section of the combined config and is re-read on + * every rebuild, so it survives auto-reload. It is cleared again in {@link #clearAuthenticationMethodSequence()}.

*/ private void setAuthenticationMethodSequence(String[] methods) { - configurationService.setProperty(AUTH_PLUGIN_KEY, null); - configurationService.setProperty(AUTH_PLUGIN_KEY, methods); + System.setProperty(AUTH_PLUGIN_KEY, String.join(",", methods)); + configurationService.reloadConfig(); + } + + /** + * Remove the system-property override set by {@link #setAuthenticationMethodSequence(String[])} so it + * does not leak into other test classes running in the same JVM. Runs before the superclass @After, + * whose {@code reloadConfig()} then restores the on-disk default. + */ + @After + public void clearAuthenticationMethodSequence() { + System.clearProperty(AUTH_PLUGIN_KEY); } @Before From ead032f18a91b8f66b4e333cc83b7633e4e7b3e7 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 8 Jun 2026 13:32:21 +0200 Subject: [PATCH 06/11] test: add Hibernate concurrency monitor + CI upload to pinpoint @After CME The intermittent ConcurrentModificationException in @After cleanup is a genuine cross-thread data race on Hibernate's per-session, non-thread-safe JDBC ResourceRegistry (xref): a second thread mutates the test thread's session while it commits/rolls back. Verified against hibernate-core-5.6.15 sources that the releaseResources forEach lambda never touches xref, so single-thread re-entrancy is impossible (this disproves the earlier HHH-15116 single-thread theory). The window is microseconds, so it does not reproduce locally even with deliberate cross-thread session sharing; it only surfaces under CI load. A live thread dump of a running IT JVM shows NO legitimate background thread ever touches Hibernate (all are Solr/HTTP/Jetty/JVM). So the culprit is a transient thread, and any non-test thread caught inside Hibernate JDBC/session code is by definition the offender. - HibernateConcurrencyMonitor: JVM-wide background sampler that records (de-duped) any non-test thread found inside org.hibernate.{resource.jdbc,engine.jdbc, internal.SessionImpl}; flushed to target/cme-dumps/ on CME and at JVM shutdown. Pure observer, never changes test behaviour. - AbstractIntegrationTestWithDatabase: start the monitor and mark the JUnit thread in setUp; flush it alongside the existing thread dump on a captured CME. - build.yml: always-upload **/target/cme-dumps/** (not gated on failure) so a successful cleanup retry no longer hides the diagnostic. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/build.yml | 14 ++ .../AbstractIntegrationTestWithDatabase.java | 9 ++ .../dspace/HibernateConcurrencyMonitor.java | 139 ++++++++++++++++++ 3 files changed, 162 insertions(+) create mode 100644 dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 1749c28fe294..90217fbe19e4 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -78,6 +78,20 @@ jobs: name: ${{ matrix.type }} results path: ${{ matrix.resultsdir }} + # Always upload any Hibernate teardown thread dumps captured by AbstractIntegrationTestWithDatabase + # when the rare ConcurrentModificationException race fires during @After cleanup. These are written + # even when the cleanup retry ultimately succeeds (so the build stays green), so this step must NOT be + # gated on failure() - otherwise a successful retry would hide the very diagnostic that pinpoints the + # colliding thread. 'if-no-files-found: ignore' keeps this a no-op on the (normal) runs with no CME. + - name: Upload Hibernate CME thread dumps (if any) + if: ${{ always() }} + uses: actions/upload-artifact@v4 + with: + name: ${{ matrix.type }} cme-dumps + path: '**/target/cme-dumps/**' + if-no-files-found: ignore + retention-days: 30 + # Upload code coverage report to artifact, so that it can be shared with the 'codecov' job (see below) - name: Upload code coverage report to Artifact uses: actions/upload-artifact@v4 diff --git a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java index d31799855fcc..03ccd3d3846c 100644 --- a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java +++ b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java @@ -111,6 +111,12 @@ public static void initDatabase() { @Before public void setUp() throws Exception { try { + // DIAGNOSTIC (temporary): start the JVM-wide Hibernate concurrency monitor and mark this JUnit + // thread as a legitimate test thread. The monitor hunts the transient thread behind the rare + // @After ConcurrentModificationException (see destroy()/dumpAllThreadsOnCme()). + HibernateConcurrencyMonitor.startOnce(); + HibernateConcurrencyMonitor.markTestThread(); + //Start a new context context = new Context(Context.Mode.READ_WRITE); context.turnOffAuthorisationSystem(); @@ -195,6 +201,9 @@ public void destroy() throws Exception { // Capture a full thread dump the instant the CME is caught, so we can see which OTHER // thread is concurrently inside JDBC/Hibernate code on the same (non-thread-safe) session. dumpAllThreadsOnCme(cme, cleanupAttempt); + // Also flush the background monitor's accumulated fingerprints of any non-test thread that + // was ever seen inside Hibernate JDBC/session code (the most reliable culprit signal). + HibernateConcurrencyMonitor.flush("cme-attempt" + cleanupAttempt); log.warn("Transient Hibernate CME during @After cleanup (concurrent access to the " + "per-session JDBC resource registry), attempt {}/{}; aborting context, capturing a " + "thread dump and retrying cleanup.", cleanupAttempt, maxCleanupAttempts, cme); diff --git a/dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java b/dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java new file mode 100644 index 000000000000..177707c0cf78 --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java @@ -0,0 +1,139 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace; + +import java.io.File; +import java.io.PrintWriter; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +/** + * TEST DIAGNOSTIC (temporary) for the rare Hibernate {@code ConcurrentModificationException} thrown from + * {@code ResourceRegistryStandardImpl.releaseResources} during {@code @After} cleanup + * (see {@link AbstractIntegrationTestWithDatabase#destroy()}). + * + *

That CME provably requires a SECOND thread to mutate the test thread's per-session, non-thread-safe JDBC + * resource registry while the test thread commits/rolls back. A live thread-dump of a running IT JVM shows that + * NO legitimate background thread ever touches Hibernate (every persistent thread is Solr, HTTP-client, Jetty or + * a JVM thread). Therefore any non-test thread caught executing inside Hibernate JDBC / session code is, + * by definition, the culprit.

+ * + *

This monitor is a JVM-wide background sampler. Every {@link #SAMPLE_INTERVAL_MS} ms it snapshots all thread + * stacks and records (de-duplicated) any non-test, non-monitor thread found inside + * {@code org.hibernate.resource.jdbc}, {@code org.hibernate.engine.jdbc} or {@code org.hibernate.internal.SessionImpl}. + * Records are flushed to {@code target/cme-dumps/} on a captured CME and at JVM shutdown. It is a pure observer: + * it never touches Hibernate, never throws into test code, and never changes behaviour. Delete once the culprit + * thread has been identified and fixed at its source.

+ */ +public final class HibernateConcurrencyMonitor { + + private static final long SAMPLE_INTERVAL_MS = 20; + + /** Thread ids of legitimate test threads (the JUnit thread(s)) to ignore. */ + private static final Set TEST_THREAD_IDS = ConcurrentHashMap.newKeySet(); + + /** De-duplicated culprit fingerprints: key -> formatted record. */ + private static final Map CULPRITS = new ConcurrentHashMap<>(); + + private static volatile boolean started; + + private HibernateConcurrencyMonitor() { + } + + /** Start the monitor exactly once per JVM (fork). Safe to call from every test's setUp. */ + public static synchronized void startOnce() { + if (started) { + return; + } + started = true; + Thread t = new Thread(HibernateConcurrencyMonitor::loop, "hibernate-concurrency-monitor"); + t.setDaemon(true); + t.start(); + Runtime.getRuntime().addShutdownHook(new Thread(() -> flush("jvm-shutdown"), "hibernate-concurrency-flush")); + } + + /** Mark the current thread as a legitimate test thread, so its (normal) Hibernate use is ignored. */ + public static void markTestThread() { + TEST_THREAD_IDS.add(Thread.currentThread().getId()); + } + + private static void loop() { + final long monitorId = Thread.currentThread().getId(); + while (true) { + try { + Map all = Thread.getAllStackTraces(); + for (Map.Entry e : all.entrySet()) { + Thread th = e.getKey(); + if (th.getId() == monitorId || TEST_THREAD_IDS.contains(th.getId())) { + continue; + } + if (touchesHibernateJdbc(e.getValue())) { + record(th, e.getValue()); + } + } + Thread.sleep(SAMPLE_INTERVAL_MS); + } catch (InterruptedException ie) { + return; + } catch (Throwable ignore) { + // A diagnostic must never die from a transient error (e.g. a thread terminating mid-snapshot). + } + } + } + + private static boolean touchesHibernateJdbc(StackTraceElement[] stack) { + for (StackTraceElement f : stack) { + String c = f.getClassName(); + if (c.startsWith("org.hibernate.resource.jdbc") + || c.startsWith("org.hibernate.engine.jdbc") + || c.startsWith("org.hibernate.internal.SessionImpl")) { + return true; + } + } + return false; + } + + private static void record(Thread th, StackTraceElement[] stack) { + StringBuilder sb = new StringBuilder(); + int n = Math.min(stack.length, 25); + for (int i = 0; i < n; i++) { + sb.append("\tat ").append(stack[i]).append('\n'); + } + String stackText = sb.toString(); + String key = th.getName() + "|" + Integer.toHexString(stackText.hashCode()); + CULPRITS.putIfAbsent(key, "\"" + th.getName() + "\" id=" + th.getId() + + " daemon=" + th.isDaemon() + " state=" + th.getState() + + " group=" + (th.getThreadGroup() == null ? "?" : th.getThreadGroup().getName()) + "\n" + stackText); + } + + /** Write all captured culprit fingerprints to target/cme-dumps/ (no-op if none were caught). */ + public static void flush(String reason) { + if (CULPRITS.isEmpty()) { + return; + } + try { + File dir = new File("target/cme-dumps"); + dir.mkdirs(); + File out = new File(dir, "hibernate-concurrency-" + System.currentTimeMillis() + "-" + reason + ".txt"); + try (PrintWriter pw = new PrintWriter(out, "UTF-8")) { + pw.println("===== Non-test threads caught INSIDE Hibernate JDBC / session code ====="); + pw.println("reason=" + reason + " distinctFingerprints=" + CULPRITS.size()); + pw.println("Baseline: NO legitimate background thread touches Hibernate, so each entry below is a"); + pw.println("suspect for the @After ConcurrentModificationException (concurrent access to the test"); + pw.println("thread's non-thread-safe per-session JDBC ResourceRegistry)."); + pw.println(); + for (String rec : CULPRITS.values()) { + pw.println(rec); + pw.println("------------------------------------------------------------"); + } + } + } catch (Exception ignore) { + // best-effort diagnostic + } + } +} From 0a88723bf8f09bd009d45bb09e6be7c17224f2d8 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 8 Jun 2026 14:34:45 +0200 Subject: [PATCH 07/11] fix: don't close iterate() Hibernate stream from a finalize() (root cause of flaky CME) Root cause of the intermittent ConcurrentModificationException in @After integration-test cleanup, identified via the HibernateConcurrencyMonitor CI dumps: the GC Finalizer thread, running org.dspace.core.AbstractHibernateDAO$1.finalize(), closed the Hibernate Stream returned by AbstractHibernateDAO.iterate(). Closing a stream closes its ScrollableResults, which mutates the owning Session's per-session, non-thread-safe JDBC ResourceRegistry (xref) - but on the Finalizer thread, concurrently with the thread that owns the session. When that collided with the owning thread's commit/rollback (releaseResources -> xref.forEach), it threw ConcurrentModificationException. The CI dumps showed this exact finalizer stack as the only non-test thread inside Hibernate in dspace-api, and present in dspace-server-webapp too. This was confirmed genuine cross-thread access (not the previously assumed single-thread/HHH bug): verified against hibernate-core-5.6.15 sources that the releaseResources forEach lambda never touches xref, so single-thread re-entrancy is impossible. Fix: close the backing stream on the owning thread when iteration is exhausted, and remove the finalize() override. An iterator abandoned before exhaustion is released safely when its Context/Session is closed (releaseResources then runs on the owning thread). Adds AbstractHibernateDAOIteratorIT to guard against reintroducing a stream-closing finalizer. Co-Authored-By: Claude Opus 4.8 --- .../org/dspace/core/AbstractHibernateDAO.java | 20 ++++-- .../core/AbstractHibernateDAOIteratorIT.java | 64 +++++++++++++++++++ 2 files changed, 80 insertions(+), 4 deletions(-) create mode 100644 dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java diff --git a/dspace-api/src/main/java/org/dspace/core/AbstractHibernateDAO.java b/dspace-api/src/main/java/org/dspace/core/AbstractHibernateDAO.java index 32ad747d765e..498c52c27f46 100644 --- a/dspace-api/src/main/java/org/dspace/core/AbstractHibernateDAO.java +++ b/dspace-api/src/main/java/org/dspace/core/AbstractHibernateDAO.java @@ -306,11 +306,23 @@ public Iterator iterate(Query query) { return new AbstractIterator () { @Override protected T computeNext() { - return iter.hasNext() ? iter.next() : endOfData(); - } - @Override - public void finalize() { + if (iter.hasNext()) { + return iter.next(); + } + // Close the backing ScrollableResults / JDBC cursor as soon as the iteration is exhausted, on + // the thread that owns the Hibernate Session (the caller's thread). + // + // This MUST NOT be done from a finalize() override (as it previously was): finalize() runs on + // the GC Finalizer thread, so closing the stream there mutates the Session's per-session, + // non-thread-safe JDBC ResourceRegistry (xref) concurrently with the owning thread. That is a + // genuine data race which intermittently throws ConcurrentModificationException from + // ResourceRegistryStandardImpl.releaseResources during an unrelated commit/rollback (observed + // as flaky integration-test failures in AbstractIntegrationTestWithDatabase teardown). + // + // An iterator abandoned before exhaustion no longer leaks: its open statement is released + // safely when the owning Context/Session is closed (releaseResources runs on the owning thread). stream.close(); + return endOfData(); } }; } diff --git a/dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java b/dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java new file mode 100644 index 000000000000..79fe8d23c266 --- /dev/null +++ b/dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java @@ -0,0 +1,64 @@ +/** + * The contents of this file are subject to the license and copyright + * detailed in the LICENSE and NOTICE files at the root of the source + * tree and available online at + * + * http://www.dspace.org/license/ + */ +package org.dspace.core; + +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import java.util.Iterator; + +import org.dspace.AbstractIntegrationTestWithDatabase; +import org.dspace.content.MetadataValue; +import org.dspace.content.factory.ContentServiceFactory; +import org.dspace.content.service.MetadataValueService; +import org.junit.Test; + +/** + * Regression test for the intermittent {@code ConcurrentModificationException} thrown during + * {@code @After} integration-test cleanup and traced to + * {@link AbstractHibernateDAO#iterate(javax.persistence.Query)}. + * + *

That method used to close its Hibernate {@code Stream} from a {@code finalize()} override. {@code finalize()} + * runs on the GC Finalizer thread, so closing the stream there mutated the owning {@code Session}'s per-session, + * non-thread-safe JDBC {@code ResourceRegistry} (xref) concurrently with the thread that owns the session. That + * is a genuine data race which intermittently threw {@code ConcurrentModificationException} from + * {@code ResourceRegistryStandardImpl.releaseResources} during an unrelated commit/rollback. The fix closes the + * stream on the owning thread once the iteration is exhausted. This test guards against reintroducing any + * stream-closing finalizer on the returned iterator.

+ */ +public class AbstractHibernateDAOIteratorIT extends AbstractIntegrationTestWithDatabase { + + private final MetadataValueService metadataValueService = + ContentServiceFactory.getInstance().getMetadataValueService(); + + @Test + public void iterateIteratorMustNotCloseStreamFromFinalizer() throws Exception { + // findByValueLike() delegates to AbstractHibernateDAO.iterate(); the concrete (anonymous) iterator type + // is what we assert on. No matching rows are required - the wrapper iterator is created regardless. + Iterator iterator = + metadataValueService.findByValueLike(context, "no-such-metadata-value-" + System.nanoTime()); + assertNotNull(iterator); + + // The returned iterator MUST NOT declare its own finalize(): closing the backing Hibernate Stream from + // the GC Finalizer thread is exactly the cross-thread access to the non-thread-safe JDBC + // ResourceRegistry that caused the flaky ConcurrentModificationException. + try { + iterator.getClass().getDeclaredMethod("finalize"); + fail("AbstractHibernateDAO.iterate() iterator must not declare a finalize() override - closing the " + + "Hibernate Stream on the GC Finalizer thread races the owning thread's non-thread-safe " + + "JDBC ResourceRegistry and intermittently throws ConcurrentModificationException."); + } catch (NoSuchMethodException expected) { + // good: no stream-closing finalizer on the iterator + } + + // It must still iterate to exhaustion and close its cursor on THIS (the owning) thread without error. + while (iterator.hasNext()) { + assertNotNull(iterator.next()); + } + } +} From 11c113bcf5a1091abb2c7f33e1ca0370495fadca Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 8 Jun 2026 15:18:58 +0200 Subject: [PATCH 08/11] fix: remove broken Context.finalize() that leaked finalizer-thread sessions Context.finalize() ran on the GC Finalizer thread and called dbConnection.isTransActionAlive()/abort(), which resolve sessionFactory.getCurrentSession() to a brand-new session bound to the Finalizer thread - never the (now-unreachable) thread that opened the Context. So it could not roll back the Context's transaction anyway; it only opened and leaked a throwaway Hibernate session on the Finalizer thread, and threw IllegalStateException once the SessionFactory was closed (seen in the CI thread dumps used to diagnose the flaky integration-test ConcurrentModificationException). Abandoned Contexts are cleaned up safely when their owning thread's session ends; callers already close Contexts via complete()/abort()/try-with-resources (Context is AutoCloseable). Removes the now-redundant ContextTest.testFinalize (close()/abort() are covered by testClose/testAbort/testAbort2). Co-Authored-By: Claude Opus 4.8 --- .../main/java/org/dspace/core/Context.java | 23 ++++++------------- .../java/org/dspace/core/ContextTest.java | 17 -------------- 2 files changed, 7 insertions(+), 33 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/core/Context.java b/dspace-api/src/main/java/org/dspace/core/Context.java index e721deff5e71..8eee735ed16c 100644 --- a/dspace-api/src/main/java/org/dspace/core/Context.java +++ b/dspace-api/src/main/java/org/dspace/core/Context.java @@ -765,22 +765,13 @@ public void restoreContextUser() { currentUserPreviousState = null; } - /** - * Close the context, aborting any open transactions (if any). - * @throws Throwable - */ - @Override - protected void finalize() throws Throwable { - /* - * If a context is garbage-collected, we roll back and free up the - * database connection if there is one. - */ - if (dbConnection != null && dbConnection.isTransActionAlive()) { - abort(); - } - - super.finalize(); - } + // NOTE: Context intentionally does NOT override finalize(). A finalizer runs on the GC Finalizer thread, + // where dbConnection.isTransActionAlive()/abort() resolve sessionFactory.getCurrentSession() to a brand-new + // session bound to the Finalizer thread - never the (now-unreachable) thread that opened this Context. So a + // finalizer could not roll back this Context's transaction anyway; it only opened and leaked a throwaway + // Hibernate session on the Finalizer thread (and threw IllegalStateException once the SessionFactory closed). + // Abandoned Contexts are cleaned up safely when their owning thread's session ends. Always close a Context + // via complete()/abort() or try-with-resources (Context implements AutoCloseable). public void shutDownDatabase() throws SQLException { dbConnection.shutdown(); diff --git a/dspace-api/src/test/java/org/dspace/core/ContextTest.java b/dspace-api/src/test/java/org/dspace/core/ContextTest.java index ccc1d2f732cc..9084d5f90991 100644 --- a/dspace-api/src/test/java/org/dspace/core/ContextTest.java +++ b/dspace-api/src/test/java/org/dspace/core/ContextTest.java @@ -520,23 +520,6 @@ public void testGetSpecialGroups() throws SQLException, AuthorizeException, IOEx cleanupContext(instance); } - /** - * Test of finalize method, of class Context. - */ - @Test - public void testFinalize() throws Throwable { - // We need a new Context object - Context instance = new Context(); - - instance.finalize(); - - // Finalize is like abort()...should invalidate our context - assertThat("testSetSpecialGroup 0", instance.isValid(), equalTo(false)); - - // Cleanup our context - cleanupContext(instance); - } - /** * Test of updateDatabase method, of class Context. */ From 0b2e9b7b631211767073a5ebce469bf5a0a79558 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 8 Jun 2026 15:19:06 +0200 Subject: [PATCH 09/11] test: remove flaky-CME diagnostic scaffolding and teardown retry (root cause fixed) The intermittent @After ConcurrentModificationException is now fixed at its source (AbstractHibernateDAO.iterate no longer closes its Hibernate stream from a finalizer; broken Context.finalize() removed). The temporary diagnostics that pinpointed it are no longer needed: - Restore AbstractIntegrationTestWithDatabase.destroy() to its plain form (drop the 3x cleanup retry and the per-CME thread dump) and remove the HibernateConcurrencyMonitor wiring. - Delete HibernateConcurrencyMonitor. - Revert the build.yml always-upload of target/cme-dumps. CI keeps -Dfailsafe.rerunFailingTestsCount=2 as the generic flaky-test safety net. Co-Authored-By: Claude Opus 4.8 --- .github/workflows/build.yml | 14 -- .../AbstractIntegrationTestWithDatabase.java | 99 +------------ .../dspace/HibernateConcurrencyMonitor.java | 139 ------------------ 3 files changed, 3 insertions(+), 249 deletions(-) delete mode 100644 dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 90217fbe19e4..1749c28fe294 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -78,20 +78,6 @@ jobs: name: ${{ matrix.type }} results path: ${{ matrix.resultsdir }} - # Always upload any Hibernate teardown thread dumps captured by AbstractIntegrationTestWithDatabase - # when the rare ConcurrentModificationException race fires during @After cleanup. These are written - # even when the cleanup retry ultimately succeeds (so the build stays green), so this step must NOT be - # gated on failure() - otherwise a successful retry would hide the very diagnostic that pinpoints the - # colliding thread. 'if-no-files-found: ignore' keeps this a no-op on the (normal) runs with no CME. - - name: Upload Hibernate CME thread dumps (if any) - if: ${{ always() }} - uses: actions/upload-artifact@v4 - with: - name: ${{ matrix.type }} cme-dumps - path: '**/target/cme-dumps/**' - if-no-files-found: ignore - retention-days: 30 - # Upload code coverage report to artifact, so that it can be shared with the 'codecov' job (see below) - name: Upload code coverage report to Artifact uses: actions/upload-artifact@v4 diff --git a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java index 03ccd3d3846c..d5bb6ab8a4c8 100644 --- a/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java +++ b/dspace-api/src/test/java/org/dspace/AbstractIntegrationTestWithDatabase.java @@ -9,12 +9,7 @@ import static org.junit.Assert.fail; -import java.io.File; -import java.io.PrintWriter; import java.sql.SQLException; -import java.util.ConcurrentModificationException; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -111,12 +106,6 @@ public static void initDatabase() { @Before public void setUp() throws Exception { try { - // DIAGNOSTIC (temporary): start the JVM-wide Hibernate concurrency monitor and mark this JUnit - // thread as a legitimate test thread. The monitor hunts the transient thread behind the rare - // @After ConcurrentModificationException (see destroy()/dumpAllThreadsOnCme()). - HibernateConcurrencyMonitor.startOnce(); - HibernateConcurrencyMonitor.markTestThread(); - //Start a new context context = new Context(Context.Mode.READ_WRITE); context.turnOffAuthorisationSystem(); @@ -179,50 +168,9 @@ public void setUp() throws Exception { public void destroy() throws Exception { // Cleanup our global context object try { - // Builders/cleanupContext commit transactions through Hibernate. We have observed a rare, - // CI-only intermittent ConcurrentModificationException thrown from - // ResourceRegistryStandardImpl#releaseResources (a HashMap.forEach over the JDBC resource - // registry) while committing here. That registry is per-Hibernate-Session and explicitly NOT - // thread-safe, so the CME means a second thread is touching the same Session concurrently - // during cleanup. The exact offending thread has not yet been identified (it does not - // reproduce locally), so on CME we (a) capture a full thread dump to pinpoint the colliding - // thread the next time it happens in CI, and (b) retry the cleanup so an already-passed test - // is not failed by this teardown race. See dumpAllThreadsOnCme(). - final int maxCleanupAttempts = 3; - boolean cleanupComplete = false; - for (int cleanupAttempt = 1; cleanupAttempt <= maxCleanupAttempts; cleanupAttempt++) { - try { - AbstractBuilder.cleanupObjects(); - parentCommunity = null; - cleanupContext(); - cleanupComplete = true; - break; - } catch (ConcurrentModificationException cme) { - // Capture a full thread dump the instant the CME is caught, so we can see which OTHER - // thread is concurrently inside JDBC/Hibernate code on the same (non-thread-safe) session. - dumpAllThreadsOnCme(cme, cleanupAttempt); - // Also flush the background monitor's accumulated fingerprints of any non-test thread that - // was ever seen inside Hibernate JDBC/session code (the most reliable culprit signal). - HibernateConcurrencyMonitor.flush("cme-attempt" + cleanupAttempt); - log.warn("Transient Hibernate CME during @After cleanup (concurrent access to the " - + "per-session JDBC resource registry), attempt {}/{}; aborting context, capturing a " - + "thread dump and retrying cleanup.", cleanupAttempt, maxCleanupAttempts, cme); - if (context != null && context.isValid()) { - context.abort(); - } - context = null; - parentCommunity = null; - - if (cleanupAttempt < maxCleanupAttempts) { - context = new Context(Context.Mode.READ_WRITE); - context.turnOffAuthorisationSystem(); - } - } - } - - if (!cleanupComplete) { - throw new IllegalStateException("Unable to complete @After DB cleanup after retries."); - } + AbstractBuilder.cleanupObjects(); + parentCommunity = null; + cleanupContext(); ServiceManager serviceManager = DSpaceServicesFactory.getInstance().getServiceManager(); // Clear the search core. @@ -254,47 +202,6 @@ public void destroy() throws Exception { } } - // Counter to give each captured dump a unique file name. - private static final AtomicInteger CME_DUMP_COUNTER = new AtomicInteger(0); - - /** - * Diagnostic helper: when a ConcurrentModificationException is caught during @After cleanup, dump the - * stack traces of ALL live threads to a file under target/cme-dumps/ (archived as a CI artifact). The - * CME is thrown on the test thread while another thread concurrently mutates the same Hibernate - * session's (non-thread-safe) JDBC ResourceRegistry; this dump is meant to reveal that other thread so - * the underlying concurrency bug can be fixed at its source. - */ - private static void dumpAllThreadsOnCme(ConcurrentModificationException cme, int attempt) { - try { - File dir = new File("target/cme-dumps"); - dir.mkdirs(); - int idx = CME_DUMP_COUNTER.incrementAndGet(); - File out = new File(dir, "cme-" + System.currentTimeMillis() + "-" + idx + "-attempt" + attempt + ".txt"); - try (PrintWriter pw = new PrintWriter(out, "UTF-8")) { - pw.println("===== ConcurrentModificationException caught during @After cleanup ====="); - pw.println("Caught on thread: " + Thread.currentThread().getName()); - pw.println("Attempt: " + attempt); - pw.println(); - pw.println("----- CME stack -----"); - cme.printStackTrace(pw); - pw.println(); - pw.println("----- ALL THREAD STACKS at moment of CME -----"); - for (Map.Entry e : Thread.getAllStackTraces().entrySet()) { - Thread t = e.getKey(); - pw.println(); - pw.println("\"" + t.getName() + "\" id=" + t.getId() + " state=" + t.getState() - + " daemon=" + t.isDaemon()); - for (StackTraceElement ste : e.getValue()) { - pw.println("\tat " + ste); - } - } - } - log.error("CME thread dump written to {}", out.getAbsolutePath()); - } catch (Exception dumpEx) { - log.error("Failed to write CME thread dump", dumpEx); - } - } - /** * Utility method to cleanup a created Context object (to save memory). * This can also be used by individual tests to cleanup context objects they create. diff --git a/dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java b/dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java deleted file mode 100644 index 177707c0cf78..000000000000 --- a/dspace-api/src/test/java/org/dspace/HibernateConcurrencyMonitor.java +++ /dev/null @@ -1,139 +0,0 @@ -/** - * The contents of this file are subject to the license and copyright - * detailed in the LICENSE and NOTICE files at the root of the source - * tree and available online at - * - * http://www.dspace.org/license/ - */ -package org.dspace; - -import java.io.File; -import java.io.PrintWriter; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - -/** - * TEST DIAGNOSTIC (temporary) for the rare Hibernate {@code ConcurrentModificationException} thrown from - * {@code ResourceRegistryStandardImpl.releaseResources} during {@code @After} cleanup - * (see {@link AbstractIntegrationTestWithDatabase#destroy()}). - * - *

That CME provably requires a SECOND thread to mutate the test thread's per-session, non-thread-safe JDBC - * resource registry while the test thread commits/rolls back. A live thread-dump of a running IT JVM shows that - * NO legitimate background thread ever touches Hibernate (every persistent thread is Solr, HTTP-client, Jetty or - * a JVM thread). Therefore any non-test thread caught executing inside Hibernate JDBC / session code is, - * by definition, the culprit.

- * - *

This monitor is a JVM-wide background sampler. Every {@link #SAMPLE_INTERVAL_MS} ms it snapshots all thread - * stacks and records (de-duplicated) any non-test, non-monitor thread found inside - * {@code org.hibernate.resource.jdbc}, {@code org.hibernate.engine.jdbc} or {@code org.hibernate.internal.SessionImpl}. - * Records are flushed to {@code target/cme-dumps/} on a captured CME and at JVM shutdown. It is a pure observer: - * it never touches Hibernate, never throws into test code, and never changes behaviour. Delete once the culprit - * thread has been identified and fixed at its source.

- */ -public final class HibernateConcurrencyMonitor { - - private static final long SAMPLE_INTERVAL_MS = 20; - - /** Thread ids of legitimate test threads (the JUnit thread(s)) to ignore. */ - private static final Set TEST_THREAD_IDS = ConcurrentHashMap.newKeySet(); - - /** De-duplicated culprit fingerprints: key -> formatted record. */ - private static final Map CULPRITS = new ConcurrentHashMap<>(); - - private static volatile boolean started; - - private HibernateConcurrencyMonitor() { - } - - /** Start the monitor exactly once per JVM (fork). Safe to call from every test's setUp. */ - public static synchronized void startOnce() { - if (started) { - return; - } - started = true; - Thread t = new Thread(HibernateConcurrencyMonitor::loop, "hibernate-concurrency-monitor"); - t.setDaemon(true); - t.start(); - Runtime.getRuntime().addShutdownHook(new Thread(() -> flush("jvm-shutdown"), "hibernate-concurrency-flush")); - } - - /** Mark the current thread as a legitimate test thread, so its (normal) Hibernate use is ignored. */ - public static void markTestThread() { - TEST_THREAD_IDS.add(Thread.currentThread().getId()); - } - - private static void loop() { - final long monitorId = Thread.currentThread().getId(); - while (true) { - try { - Map all = Thread.getAllStackTraces(); - for (Map.Entry e : all.entrySet()) { - Thread th = e.getKey(); - if (th.getId() == monitorId || TEST_THREAD_IDS.contains(th.getId())) { - continue; - } - if (touchesHibernateJdbc(e.getValue())) { - record(th, e.getValue()); - } - } - Thread.sleep(SAMPLE_INTERVAL_MS); - } catch (InterruptedException ie) { - return; - } catch (Throwable ignore) { - // A diagnostic must never die from a transient error (e.g. a thread terminating mid-snapshot). - } - } - } - - private static boolean touchesHibernateJdbc(StackTraceElement[] stack) { - for (StackTraceElement f : stack) { - String c = f.getClassName(); - if (c.startsWith("org.hibernate.resource.jdbc") - || c.startsWith("org.hibernate.engine.jdbc") - || c.startsWith("org.hibernate.internal.SessionImpl")) { - return true; - } - } - return false; - } - - private static void record(Thread th, StackTraceElement[] stack) { - StringBuilder sb = new StringBuilder(); - int n = Math.min(stack.length, 25); - for (int i = 0; i < n; i++) { - sb.append("\tat ").append(stack[i]).append('\n'); - } - String stackText = sb.toString(); - String key = th.getName() + "|" + Integer.toHexString(stackText.hashCode()); - CULPRITS.putIfAbsent(key, "\"" + th.getName() + "\" id=" + th.getId() - + " daemon=" + th.isDaemon() + " state=" + th.getState() - + " group=" + (th.getThreadGroup() == null ? "?" : th.getThreadGroup().getName()) + "\n" + stackText); - } - - /** Write all captured culprit fingerprints to target/cme-dumps/ (no-op if none were caught). */ - public static void flush(String reason) { - if (CULPRITS.isEmpty()) { - return; - } - try { - File dir = new File("target/cme-dumps"); - dir.mkdirs(); - File out = new File(dir, "hibernate-concurrency-" + System.currentTimeMillis() + "-" + reason + ".txt"); - try (PrintWriter pw = new PrintWriter(out, "UTF-8")) { - pw.println("===== Non-test threads caught INSIDE Hibernate JDBC / session code ====="); - pw.println("reason=" + reason + " distinctFingerprints=" + CULPRITS.size()); - pw.println("Baseline: NO legitimate background thread touches Hibernate, so each entry below is a"); - pw.println("suspect for the @After ConcurrentModificationException (concurrent access to the test"); - pw.println("thread's non-thread-safe per-session JDBC ResourceRegistry)."); - pw.println(); - for (String rec : CULPRITS.values()) { - pw.println(rec); - pw.println("------------------------------------------------------------"); - } - } - } catch (Exception ignore) { - // best-effort diagnostic - } - } -} From 5375b504166e0cd0afb3bab454f1bc7e341d460c Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 8 Jun 2026 15:34:28 +0200 Subject: [PATCH 10/11] revert: keep Context.finalize() (out of scope, not the CME cause) Reverts the Context.finalize() removal (and the ContextTest.testFinalize deletion). The flaky @After ConcurrentModificationException is fully fixed by the AbstractHibernateDAO.iterate() change alone; Context.finalize() runs on a single GC Finalizer thread against its own finalizer-thread session and provably cannot cause that cross-thread xref race. Removing a finalizer from this core, widely-used class is a riskier change that does not belong in a flaky-test fix, so leave Context untouched. The (pre-existing, harmless) finalizer-thread session it opens can be addressed separately if desired. Co-Authored-By: Claude Opus 4.8 --- .../main/java/org/dspace/core/Context.java | 23 +++++++++++++------ .../java/org/dspace/core/ContextTest.java | 17 ++++++++++++++ 2 files changed, 33 insertions(+), 7 deletions(-) diff --git a/dspace-api/src/main/java/org/dspace/core/Context.java b/dspace-api/src/main/java/org/dspace/core/Context.java index 8eee735ed16c..e721deff5e71 100644 --- a/dspace-api/src/main/java/org/dspace/core/Context.java +++ b/dspace-api/src/main/java/org/dspace/core/Context.java @@ -765,13 +765,22 @@ public void restoreContextUser() { currentUserPreviousState = null; } - // NOTE: Context intentionally does NOT override finalize(). A finalizer runs on the GC Finalizer thread, - // where dbConnection.isTransActionAlive()/abort() resolve sessionFactory.getCurrentSession() to a brand-new - // session bound to the Finalizer thread - never the (now-unreachable) thread that opened this Context. So a - // finalizer could not roll back this Context's transaction anyway; it only opened and leaked a throwaway - // Hibernate session on the Finalizer thread (and threw IllegalStateException once the SessionFactory closed). - // Abandoned Contexts are cleaned up safely when their owning thread's session ends. Always close a Context - // via complete()/abort() or try-with-resources (Context implements AutoCloseable). + /** + * Close the context, aborting any open transactions (if any). + * @throws Throwable + */ + @Override + protected void finalize() throws Throwable { + /* + * If a context is garbage-collected, we roll back and free up the + * database connection if there is one. + */ + if (dbConnection != null && dbConnection.isTransActionAlive()) { + abort(); + } + + super.finalize(); + } public void shutDownDatabase() throws SQLException { dbConnection.shutdown(); diff --git a/dspace-api/src/test/java/org/dspace/core/ContextTest.java b/dspace-api/src/test/java/org/dspace/core/ContextTest.java index 9084d5f90991..ccc1d2f732cc 100644 --- a/dspace-api/src/test/java/org/dspace/core/ContextTest.java +++ b/dspace-api/src/test/java/org/dspace/core/ContextTest.java @@ -520,6 +520,23 @@ public void testGetSpecialGroups() throws SQLException, AuthorizeException, IOEx cleanupContext(instance); } + /** + * Test of finalize method, of class Context. + */ + @Test + public void testFinalize() throws Throwable { + // We need a new Context object + Context instance = new Context(); + + instance.finalize(); + + // Finalize is like abort()...should invalidate our context + assertThat("testSetSpecialGroup 0", instance.isValid(), equalTo(false)); + + // Cleanup our context + cleanupContext(instance); + } + /** * Test of updateDatabase method, of class Context. */ From a30ca796d0bf967b10f98c0c4ad2a7f33f12c6f4 Mon Sep 17 00:00:00 2001 From: milanmajchrak Date: Mon, 8 Jun 2026 15:34:37 +0200 Subject: [PATCH 11/11] test: address review comments on flaky-test fix - AbstractHibernateDAOIteratorIT: add Javadoc to the test method and walk the iterator's full class hierarchy (up to Object) when asserting no finalize() override, so a finalizer reintroduced on a superclass/helper is also caught (per CodeRabbit review). - AuthenticationRestControllerIT: wrap an over-length (122 char) Javadoc line. Co-Authored-By: Claude Opus 4.8 --- .../core/AbstractHibernateDAOIteratorIT.java | 36 ++++++++++++------- .../rest/AuthenticationRestControllerIT.java | 3 +- 2 files changed, 26 insertions(+), 13 deletions(-) diff --git a/dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java b/dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java index 79fe8d23c266..fcc00434d620 100644 --- a/dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java +++ b/dspace-api/src/test/java/org/dspace/core/AbstractHibernateDAOIteratorIT.java @@ -36,24 +36,36 @@ public class AbstractHibernateDAOIteratorIT extends AbstractIntegrationTestWithD private final MetadataValueService metadataValueService = ContentServiceFactory.getInstance().getMetadataValueService(); + /** + * Verifies that the iterator returned by {@link AbstractHibernateDAO#iterate(javax.persistence.Query)} + * (exercised here through {@code MetadataValueService.findByValueLike}) does not close its backing Hibernate + * stream from a {@code finalize()} override anywhere in its class hierarchy, and that it still iterates to + * exhaustion (closing its cursor on the owning thread) without error. No matching rows are required - the + * wrapper iterator is created regardless of the result count. + * + * @throws Exception passed through. + */ @Test public void iterateIteratorMustNotCloseStreamFromFinalizer() throws Exception { - // findByValueLike() delegates to AbstractHibernateDAO.iterate(); the concrete (anonymous) iterator type - // is what we assert on. No matching rows are required - the wrapper iterator is created regardless. Iterator iterator = metadataValueService.findByValueLike(context, "no-such-metadata-value-" + System.nanoTime()); assertNotNull(iterator); - // The returned iterator MUST NOT declare its own finalize(): closing the backing Hibernate Stream from - // the GC Finalizer thread is exactly the cross-thread access to the non-thread-safe JDBC - // ResourceRegistry that caused the flaky ConcurrentModificationException. - try { - iterator.getClass().getDeclaredMethod("finalize"); - fail("AbstractHibernateDAO.iterate() iterator must not declare a finalize() override - closing the " - + "Hibernate Stream on the GC Finalizer thread races the owning thread's non-thread-safe " - + "JDBC ResourceRegistry and intermittently throws ConcurrentModificationException."); - } catch (NoSuchMethodException expected) { - // good: no stream-closing finalizer on the iterator + // The returned iterator - and every class in its hierarchy up to Object - MUST NOT declare a finalize() + // override: closing the backing Hibernate Stream from the GC Finalizer thread is exactly the cross-thread + // access to the non-thread-safe per-session JDBC ResourceRegistry that caused the flaky + // ConcurrentModificationException. Walking the hierarchy also catches a finalizer reintroduced on a + // superclass/helper rather than on the anonymous leaf class. + for (Class type = iterator.getClass(); type != null && type != Object.class; type = type.getSuperclass()) { + try { + type.getDeclaredMethod("finalize"); + fail("AbstractHibernateDAO.iterate() iterator must not declare a finalize() override (found on " + + type.getName() + ") - closing the Hibernate Stream on the GC Finalizer thread races the " + + "owning thread's non-thread-safe JDBC ResourceRegistry and intermittently throws " + + "ConcurrentModificationException."); + } catch (NoSuchMethodException expected) { + // good: no stream-closing finalizer on this class + } } // It must still iterate to exhaustion and close its cursor on THIS (the owning) thread without error. diff --git a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java index 24afcd48f98f..b23811f27f17 100644 --- a/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java +++ b/dspace-server-webapp/src/test/java/org/dspace/app/rest/AuthenticationRestControllerIT.java @@ -153,7 +153,8 @@ public class AuthenticationRestControllerIT extends AbstractControllerIntegratio * {@code [PasswordAuthentication, ClarinShibAuthentication]} -- and a stray {@code password realm} leaks * into the {@code WWW-Authenticate} header even though only e.g. Shibboleth was requested. A system * property sits in the highest-precedence (override) section of the combined config and is re-read on - * every rebuild, so it survives auto-reload. It is cleared again in {@link #clearAuthenticationMethodSequence()}.

+ * every rebuild, so it survives auto-reload. It is cleared again in + * {@link #clearAuthenticationMethodSequence()}.

*/ private void setAuthenticationMethodSequence(String[] methods) { System.setProperty(AUTH_PLUGIN_KEY, String.join(",", methods));