From 3ac3503503f324a6bcd162f986d60647c7137e9b Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Mon, 4 May 2026 09:38:47 +0200 Subject: [PATCH 1/5] [CI] Collect diagnostic info on LaunchTimedOut simulator failures. Ref #25299 When simulator tests fail with LaunchTimedOut, collect diagnostic info: - Simulator list (simctl list) to check boot state - Simulator boot status for the target UDID - Installed apps on the simulator Also run simctl diagnose in the pipeline on test failure (not only when system.debug is true). Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/xharness/AppRunner.cs | 43 +++++++++++++++++++ .../automation/templates/tests/run-tests.yml | 4 +- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/tests/xharness/AppRunner.cs b/tests/xharness/AppRunner.cs index 4cc1a11b2690..0739341d5f57 100644 --- a/tests/xharness/AppRunner.cs +++ b/tests/xharness/AppRunner.cs @@ -432,9 +432,52 @@ public async Task RunAsync () FailureMessage = "Test app failed to launch."; } + // Collect diagnostic info when the launch times out to help diagnose the issue + if (Result == TestExecutingResult.LaunchTimedOut) + await CollectLaunchTimedOutDiagnostics (); + return testReporter?.Success == true ? 0 : 1; } + async Task CollectLaunchTimedOutDiagnostics () + { + MainLog.WriteLine ("Launch timed out. Collecting diagnostic info..."); + + try { + var diagnosticLog = Logs.Create ($"launch-timeout-diagnostics-{Harness.Helpers.Timestamp}.log", "Launch timeout diagnostics"); + + diagnosticLog.WriteLine ($"Launch timed out for {AppInformation.AppName} ({AppInformation.BundleIdentifier})"); + diagnosticLog.WriteLine ($"Target: {target}"); + diagnosticLog.WriteLine ($"Simulator UDID: {simulator?.UDID ?? "N/A"}"); + diagnosticLog.WriteLine ($"Simulator Name: {simulator?.Name ?? "N/A"}"); + diagnosticLog.WriteLine ($"Launch timeout: {harness.LaunchTimeout} minutes"); + diagnosticLog.WriteLine (""); + + // List booted simulators and their state + diagnosticLog.WriteLine ("=== Simulator List ==="); + var simListResult = await processManager.ExecuteXcodeCommandAsync ("simctl", new [] { "list" }, diagnosticLog, TimeSpan.FromMinutes (1)); + diagnosticLog.WriteLine ($"simctl list exit code: {simListResult.ExitCode}"); + diagnosticLog.WriteLine (""); + + // Check if the specific simulator is booted + if (simulator?.UDID is not null) { + diagnosticLog.WriteLine ($"=== Simulator Status for {simulator.UDID} ==="); + var statusResult = await processManager.ExecuteXcodeCommandAsync ("simctl", new [] { "bootstatus", simulator.UDID }, diagnosticLog, TimeSpan.FromSeconds (10)); + diagnosticLog.WriteLine ($"bootstatus exit code: {statusResult.ExitCode}"); + diagnosticLog.WriteLine (""); + + // List installed apps on the simulator + diagnosticLog.WriteLine ($"=== Installed Apps on {simulator.UDID} ==="); + var listAppsResult = await processManager.ExecuteXcodeCommandAsync ("simctl", new [] { "listapps", simulator.UDID }, diagnosticLog, TimeSpan.FromSeconds (30)); + diagnosticLog.WriteLine ($"listapps exit code: {listAppsResult.ExitCode}"); + } + + MainLog.WriteLine ($"Launch timeout diagnostics written to {diagnosticLog.FullPath}"); + } catch (Exception e) { + MainLog.WriteLine ($"Failed to collect launch timeout diagnostics: {e.Message}"); + } + } + static bool IsLaunchFailure (IFileBackedLog log) { try { diff --git a/tools/devops/automation/templates/tests/run-tests.yml b/tools/devops/automation/templates/tests/run-tests.yml index 0ff0d43d39cb..651f9ea97762 100644 --- a/tools/devops/automation/templates/tests/run-tests.yml +++ b/tools/devops/automation/templates/tests/run-tests.yml @@ -149,7 +149,7 @@ steps: fi displayName: 'Collect diagnostic info from simulators' - condition: eq(variables['system.debug'], true) + condition: or(eq(variables['system.debug'], true), failed()) continueOnError: true name: collectSimulatorInfo timeoutInMinutes: 30 @@ -160,7 +160,7 @@ steps: inputs: targetPath: $(System.DefaultWorkingDirectory)/diagnostic-sim-output artifactName: '${{ parameters.uploadPrefix }}diagnostic-simulator-info-$(Build.BuildId)-$(System.StageAttempt)-$(System.JobAttempt)-${{ parameters.labelWithPlatform }}' - condition: and(eq(variables['system.debug'], true), succeededOrFailed()) + condition: or(eq(variables['system.debug'], true), failed()) continueOnError: true # Upload TestSummary as an artifact. From 4d16ee65b7df56813ae5b4d5bff4f28f180b340e Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Wed, 6 May 2026 14:26:43 +0200 Subject: [PATCH 2/5] [CI] Improve LaunchTimedOut diagnostics visibility and data collection Log the LaunchTimedOut condition to the Jenkins main log so it's visible in the pipeline output (previously it was only in a per-test Run log file). Also collect load average and recent crash reports in the diagnostic log, since we found sqlite3 crashes correlated with LaunchTimedOut failures. Ref #25299 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/xharness/AppRunner.cs | 23 +++++++++++++++++++ .../Jenkins/TestTasks/RunSimulator.cs | 3 +++ 2 files changed, 26 insertions(+) diff --git a/tests/xharness/AppRunner.cs b/tests/xharness/AppRunner.cs index 0739341d5f57..70583e1ae6c9 100644 --- a/tests/xharness/AppRunner.cs +++ b/tests/xharness/AppRunner.cs @@ -453,6 +453,29 @@ async Task CollectLaunchTimedOutDiagnostics () diagnosticLog.WriteLine ($"Launch timeout: {harness.LaunchTimeout} minutes"); diagnosticLog.WriteLine (""); + // Collect load average (high load is correlated with this issue) + diagnosticLog.WriteLine ("=== System Load ==="); + var uptimeResult = await processManager.ExecuteCommandAsync ("uptime", Array.Empty (), diagnosticLog, TimeSpan.FromSeconds (5)); + diagnosticLog.WriteLine ($"uptime exit code: {uptimeResult.ExitCode}"); + diagnosticLog.WriteLine (""); + + // Check for recent crash reports (sqlite3 crashes are correlated with LaunchTimedOut) + diagnosticLog.WriteLine ("=== Recent Crash Reports (last 10 minutes) ==="); + var crashReportsDir = Path.Combine (Environment.GetFolderPath (Environment.SpecialFolder.UserProfile), "Library/Logs/DiagnosticReports"); + if (Directory.Exists (crashReportsDir)) { + var tenMinutesAgo = DateTime.UtcNow.AddMinutes (-10); + var recentCrashes = Directory.GetFiles (crashReportsDir, "*.ips") + .Where (f => File.GetCreationTimeUtc (f) > tenMinutesAgo) + .OrderByDescending (f => File.GetCreationTimeUtc (f)) + .ToList (); + diagnosticLog.WriteLine ($"Found {recentCrashes.Count} recent crash reports:"); + foreach (var crash in recentCrashes) + diagnosticLog.WriteLine ($" {File.GetCreationTimeUtc (crash):O} {Path.GetFileName (crash)}"); + } else { + diagnosticLog.WriteLine ($"Crash reports directory not found: {crashReportsDir}"); + } + diagnosticLog.WriteLine (""); + // List booted simulators and their state diagnosticLog.WriteLine ("=== Simulator List ==="); var simListResult = await processManager.ExecuteXcodeCommandAsync ("simctl", new [] { "list" }, diagnosticLog, TimeSpan.FromMinutes (1)); diff --git a/tests/xharness/Jenkins/TestTasks/RunSimulator.cs b/tests/xharness/Jenkins/TestTasks/RunSimulator.cs index e94c372e0534..1b013bc89e71 100644 --- a/tests/xharness/Jenkins/TestTasks/RunSimulator.cs +++ b/tests/xharness/Jenkins/TestTasks/RunSimulator.cs @@ -124,6 +124,9 @@ public async Task RunTestAsync () } testTask.ExecutionResult = testTask.Runner.Result; + if (testTask.ExecutionResult == TestExecutingResult.LaunchTimedOut) + mainLog.WriteLine ($"Test launch timed out for {testTask.ProjectFile} on {testTask.Device?.Name} ({testTask.Device?.UDID}). See the 'Launch timeout diagnostics' log for more info. Ref: https://github.com/dotnet/macios/issues/25299"); + testTask.KnownFailure = null; if (errorKnowledgeBase.IsKnownTestIssue (testTask.Runner.MainLog, out var failure)) { testTask.KnownFailure = failure; From d11b430ec2285d5ae837da3e440e1e6df1e7629e Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Wed, 6 May 2026 14:34:20 +0200 Subject: [PATCH 3/5] [CI] Retry once on LaunchTimedOut simulator failures When a simulator test launch times out, retry once with a fresh AppRunner. This addresses the transient failures caused by high system load and sqlite3 crashes on CI machines that prevent simulators from properly launching test apps. Also propagate AppRunner.FailureMessage to the test task so failure details are visible in the test report. Ref #25299 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/xharness/Jenkins/TestTasks/RunSimulator.cs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/xharness/Jenkins/TestTasks/RunSimulator.cs b/tests/xharness/Jenkins/TestTasks/RunSimulator.cs index 1b013bc89e71..538d2c63cc71 100644 --- a/tests/xharness/Jenkins/TestTasks/RunSimulator.cs +++ b/tests/xharness/Jenkins/TestTasks/RunSimulator.cs @@ -123,6 +123,20 @@ public async Task RunTestAsync () await testTask.Runner!.RunAsync (); } testTask.ExecutionResult = testTask.Runner.Result; + testTask.FailureMessage = testTask.Runner.FailureMessage; + + // Retry once on LaunchTimedOut - this is a transient failure typically caused by + // high system load or simulator instability (e.g. sqlite3 crashes). Ref #25299. + if (testTask.ExecutionResult == TestExecutingResult.LaunchTimedOut && testTask.Harness.InCI) { + mainLog.WriteLine ($"Test launch timed out for {testTask.ProjectFile} on {testTask.Device?.Name} ({testTask.Device?.UDID}). Retrying once..."); + testTask.Runner = null; + using (var resource = await testTask.NotifyBlockingWaitAsync (testTask.AcquireResourceAsync ())) { + await SelectSimulatorAsync (); + await testTask.Runner!.RunAsync (); + } + testTask.ExecutionResult = testTask.Runner.Result; + testTask.FailureMessage = testTask.Runner.FailureMessage; + } if (testTask.ExecutionResult == TestExecutingResult.LaunchTimedOut) mainLog.WriteLine ($"Test launch timed out for {testTask.ProjectFile} on {testTask.Device?.Name} ({testTask.Device?.UDID}). See the 'Launch timeout diagnostics' log for more info. Ref: https://github.com/dotnet/macios/issues/25299"); From 23a7934e38792b5181a671fbf2d68ed66e3d106a Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Thu, 7 May 2026 08:17:47 +0200 Subject: [PATCH 4/5] Build me --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index 9832dd23e805..6ee9d03482fd 100644 --- a/README.md +++ b/README.md @@ -68,3 +68,4 @@ For legacy Xamarin.iOS and Xamarin.Mac downloads (discontinued), see [Downloads] Copyright (c) .NET Foundation Contributors. All rights reserved. Licensed under the [MIT](https://github.com/dotnet/macios/blob/main/LICENSE) License. + From eafc72b139d0edade4f73cb088f30433a0257072 Mon Sep 17 00:00:00 2001 From: Rolf Bjarne Kvinge Date: Fri, 8 May 2026 13:26:53 +0200 Subject: [PATCH 5/5] [CI] Fix LaunchTimedOut retry: clear Finished flag before retrying The retry was silently failing because setting testTask.ExecutionResult to LaunchTimedOut also set the Finished flag. SelectSimulatorAsync() checks testTask.Finished and returns early, leaving testTask.Runner as null, which then throws NullReferenceException. Fix by clearing the StateMask bits (which includes Finished) and setting Running before calling SelectSimulatorAsync(). Ref #25299 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/xharness/Jenkins/TestTasks/RunSimulator.cs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/xharness/Jenkins/TestTasks/RunSimulator.cs b/tests/xharness/Jenkins/TestTasks/RunSimulator.cs index 538d2c63cc71..23301361514b 100644 --- a/tests/xharness/Jenkins/TestTasks/RunSimulator.cs +++ b/tests/xharness/Jenkins/TestTasks/RunSimulator.cs @@ -130,6 +130,8 @@ public async Task RunTestAsync () if (testTask.ExecutionResult == TestExecutingResult.LaunchTimedOut && testTask.Harness.InCI) { mainLog.WriteLine ($"Test launch timed out for {testTask.ProjectFile} on {testTask.Device?.Name} ({testTask.Device?.UDID}). Retrying once..."); testTask.Runner = null; + // Reset execution result so SelectSimulatorAsync doesn't bail due to Finished flag + testTask.ExecutionResult = testTask.ExecutionResult & ~TestExecutingResult.StateMask | TestExecutingResult.Running; using (var resource = await testTask.NotifyBlockingWaitAsync (testTask.AcquireResourceAsync ())) { await SelectSimulatorAsync (); await testTask.Runner!.RunAsync ();