Skip to content

Commit 48b547f

Browse files
[tests] cache FindNugetGlobalPackageFolder() to fix test timeouts (#10626)
**Logging around test timeouts** * Added a new `diagnose-hanging-processes.yaml` template that collects detailed diagnostics about running processes, disk/memory usage, network connections, and system logs for both macOS/Linux and Windows agents. This helps troubleshoot stuck test jobs by providing more actionable information. * Integrated the new template into the `run-nunit-tests.yaml` and `run-sliced-nunit-tests.yaml` pipelines, so diagnostics are automatically run when tests hang or fail. This allowed copilot to discover: The problem: `dotnet nuget locals` command is spinning at 85-77% CPU for 129+ minutes (process ID 27626) This process started at 11:36PM and was still running at 1:48AM when diagnostics were captured, consuming massive CPU: runner 27626 85.2% /Users/runner/work/1/s/bin/Release/dotnet/dotnet nuget locals --list global-packages **Fix** * Improved the `FindNugetGlobalPackageFolder` method in `FileSystemUtils.cs` by adding result caching. This avoids repeated environment checks and process invocations, reducing overhead during test setup.
1 parent 4b46cce commit 48b547f

4 files changed

Lines changed: 129 additions & 4 deletions

File tree

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
steps:
2+
- bash: |
3+
echo "=========================================="
4+
echo "Diagnosing potentially hanging processes"
5+
echo "=========================================="
6+
echo ""
7+
echo "All running processes:"
8+
ps aux || true
9+
echo ""
10+
echo "=========================================="
11+
echo "MSBuild processes:"
12+
ps aux | grep -i msbuild || echo "No MSBuild processes found"
13+
echo ""
14+
echo "=========================================="
15+
echo "dotnet processes:"
16+
ps aux | grep -i dotnet || echo "No dotnet processes found"
17+
echo ""
18+
echo "=========================================="
19+
echo "adb processes:"
20+
ps aux | grep -i adb || echo "No adb processes found"
21+
echo ""
22+
echo "=========================================="
23+
echo "Java processes:"
24+
ps aux | grep -i java || echo "No Java processes found"
25+
echo ""
26+
echo "=========================================="
27+
echo "nunit processes:"
28+
ps aux | grep -i nunit || echo "No nunit processes found"
29+
echo ""
30+
echo "=========================================="
31+
echo "Process tree:"
32+
pstree -p $$ 2>/dev/null || echo "pstree not available"
33+
echo ""
34+
echo "=========================================="
35+
echo "Open files by dotnet processes:"
36+
lsof -c dotnet 2>/dev/null | head -100 || echo "lsof not available or no dotnet processes"
37+
echo ""
38+
echo "=========================================="
39+
echo "Open files by MSBuild processes:"
40+
lsof -c MSBuild 2>/dev/null | head -100 || echo "lsof not available or no MSBuild processes"
41+
echo ""
42+
echo "=========================================="
43+
echo "Network connections:"
44+
netstat -an 2>/dev/null | grep ESTABLISHED | head -50 || echo "netstat not available"
45+
echo ""
46+
echo "=========================================="
47+
echo "Disk usage:"
48+
df -h || true
49+
echo ""
50+
echo "=========================================="
51+
echo "Memory usage:"
52+
vm_stat 2>/dev/null || free -h 2>/dev/null || true
53+
echo ""
54+
echo "=========================================="
55+
echo "Recent system messages (last 50 lines):"
56+
tail -50 /var/log/system.log 2>/dev/null || dmesg 2>/dev/null | tail -50 || echo "System logs not accessible"
57+
echo ""
58+
echo "=========================================="
59+
displayName: diagnose hanging processes (macOS/Linux)
60+
condition: and(always(), in(variables['agent.os'], 'Darwin', 'Linux'))
61+
continueOnError: true
62+
63+
- powershell: |
64+
Write-Host "=========================================="
65+
Write-Host "Diagnosing potentially hanging processes"
66+
Write-Host "=========================================="
67+
Write-Host ""
68+
Write-Host "All running processes:"
69+
Get-Process | Format-Table -AutoSize
70+
Write-Host ""
71+
Write-Host "=========================================="
72+
Write-Host "MSBuild processes:"
73+
Get-Process -Name *MSBuild* -ErrorAction SilentlyContinue | Format-Table -AutoSize
74+
if (-not $?) { Write-Host "No MSBuild processes found" }
75+
Write-Host ""
76+
Write-Host "=========================================="
77+
Write-Host "dotnet processes:"
78+
Get-Process -Name dotnet* -ErrorAction SilentlyContinue | Format-Table -AutoSize
79+
if (-not $?) { Write-Host "No dotnet processes found" }
80+
Write-Host ""
81+
Write-Host "=========================================="
82+
Write-Host "adb processes:"
83+
Get-Process -Name adb* -ErrorAction SilentlyContinue | Format-Table -AutoSize
84+
if (-not $?) { Write-Host "No adb processes found" }
85+
Write-Host ""
86+
Write-Host "=========================================="
87+
Write-Host "Java processes:"
88+
Get-Process -Name java* -ErrorAction SilentlyContinue | Format-Table -AutoSize
89+
if (-not $?) { Write-Host "No Java processes found" }
90+
Write-Host ""
91+
Write-Host "=========================================="
92+
Write-Host "nunit processes:"
93+
Get-Process -Name *nunit* -ErrorAction SilentlyContinue | Format-Table -AutoSize
94+
if (-not $?) { Write-Host "No nunit processes found" }
95+
Write-Host ""
96+
Write-Host "=========================================="
97+
Write-Host "Network connections (ESTABLISHED):"
98+
netstat -ano | Select-String "ESTABLISHED" | Select-Object -First 50
99+
Write-Host ""
100+
Write-Host "=========================================="
101+
Write-Host "Disk usage:"
102+
Get-PSDrive -PSProvider FileSystem | Format-Table -AutoSize
103+
Write-Host ""
104+
Write-Host "=========================================="
105+
Write-Host "Memory usage:"
106+
Get-CimInstance Win32_OperatingSystem | Select-Object FreePhysicalMemory, TotalVisibleMemorySize | Format-List
107+
Write-Host ""
108+
Write-Host "=========================================="
109+
Write-Host "Recent Application Event Log errors (last 20):"
110+
Get-EventLog -LogName Application -EntryType Error -Newest 20 -ErrorAction SilentlyContinue | Format-Table -AutoSize
111+
Write-Host ""
112+
Write-Host "=========================================="
113+
displayName: diagnose hanging processes (Windows)
114+
condition: and(always(), eq(variables['agent.os'], 'Windows_NT'))
115+
continueOnError: true

build-tools/automation/yaml-templates/run-nunit-tests.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,6 @@ steps:
2222
timeoutInMinutes: ${{ parameters.timeoutInMinutes }}
2323
retryCountOnTaskFailure: ${{ parameters.retryCountOnTaskFailure }}
2424

25+
- template: /build-tools/automation/yaml-templates/diagnose-hanging-processes.yaml
26+
2527
- template: /build-tools/automation/yaml-templates/kill-processes.yaml

build-tools/automation/yaml-templates/run-sliced-nunit-tests.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ steps:
4848
ignoreLASTEXITCODE: true
4949
workingDirectory: ${{ parameters.xaSourcePath }}
5050
51+
- template: /build-tools/automation/yaml-templates/diagnose-hanging-processes.yaml
52+
5153
- pwsh: |
5254
$(Agent.ToolsDirectory)/dotnet-test-slicer `
5355
retry `

src/Xamarin.Android.Build.Tasks/Tests/Xamarin.ProjectTools/Utilities/FileSystemUtils.cs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ public static void SetFileWriteable (string source)
5656
}
5757

5858
static readonly char[] NugetFieldSeparator = new char[]{ ':' };
59+
static string CachedNugetGlobalPackageFolder;
5960

6061
/// <summary>
6162
/// Finds the NuGet global packages folder by checking environment variables and using dotnet CLI.
@@ -65,13 +66,18 @@ public static void SetFileWriteable (string source)
6566
/// First checks the NUGET_PACKAGES environment variable, then uses 'dotnet nuget locals'
6667
/// command to determine the global packages location. This is used for configuring
6768
/// test projects with the correct package restore location.
69+
/// The result is cached to avoid repeated process invocations.
6870
/// </remarks>
6971
/// <seealso cref="TestEnvironment"/>
7072
public static string FindNugetGlobalPackageFolder ()
7173
{
74+
if (!string.IsNullOrEmpty (CachedNugetGlobalPackageFolder)) {
75+
return CachedNugetGlobalPackageFolder;
76+
}
77+
7278
string packagesPath = Environment.GetEnvironmentVariable ("NUGET_PACKAGES");
7379
if (!String.IsNullOrEmpty (packagesPath)) {
74-
return packagesPath;
80+
return CachedNugetGlobalPackageFolder = packagesPath;
7581
}
7682

7783
bool isWindows = Environment.OSVersion.Platform == PlatformID.Win32NT;
@@ -143,16 +149,16 @@ public static string FindNugetGlobalPackageFolder ()
143149
}
144150

145151
if (!gotOutput) {
146-
return GetDefaultPackagesPath ();
152+
return CachedNugetGlobalPackageFolder = GetDefaultPackagesPath ();
147153
}
148154

149155
string[] parts = stdout_lines[0].Split (NugetFieldSeparator, 2);
150156
if (parts.Length < 2) {
151157
Console.Error.WriteLine ($"Process `{psi.FileName} {psi.Arguments}` did not return expected output, using default nuget package cache path.");
152-
return GetDefaultPackagesPath ();
158+
return CachedNugetGlobalPackageFolder = GetDefaultPackagesPath ();
153159
}
154160

155-
return parts[1].Trim ();
161+
return CachedNugetGlobalPackageFolder = parts [1].Trim ();
156162

157163
string GetDefaultPackagesPath ()
158164
{

0 commit comments

Comments
 (0)