Skip to content

Commit 12c47a5

Browse files
committed
fix: move Hangfire stale lock cleanup to hosted service, harden tenant provisioning
- Extract stale lock cleanup from Hangfire config delegate into HangfireStaleLockCleanupService (runs after app starts, not during DI) - Harden TenantStoreInitializerHostedService and TenantAutoProvisioningHostedService for reliability during startup - Add gitignore entries for Next.js client workspace
1 parent aee5943 commit 12c47a5

7 files changed

Lines changed: 137 additions & 78 deletions

File tree

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,3 +500,10 @@ tmpclaude**
500500

501501
# Auto Claude data directory
502502
.auto-claude/
503+
504+
# Clients (Next.js / pnpm workspace)
505+
clients/**/node_modules/
506+
clients/**/.next/
507+
clients/**/.turbo/
508+
clients/**/dist/
509+
clients/**/.env.local

src/BuildingBlocks/Jobs/Extensions.cs

Lines changed: 3 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@
66
using Microsoft.AspNetCore.Builder;
77
using Microsoft.Extensions.Configuration;
88
using Microsoft.Extensions.DependencyInjection;
9-
using Microsoft.Extensions.Logging;
10-
using Npgsql;
119

1210
namespace FSH.Framework.Jobs;
1311

@@ -37,9 +35,6 @@ public static IServiceCollection AddHeroJobs(this IServiceCollection services)
3735
switch (dbOptions.Provider.ToUpperInvariant())
3836
{
3937
case DbProviders.PostgreSQL:
40-
// Clean up stale locks before configuring Hangfire
41-
CleanupStaleLocks(dbOptions.ConnectionString, provider);
42-
4338
config.UsePostgreSqlStorage(o =>
4439
{
4540
o.UseNpgsqlConnection(dbOptions.ConnectionString);
@@ -60,39 +55,14 @@ public static IServiceCollection AddHeroJobs(this IServiceCollection services)
6055
config.UseFilter(new HangfireTelemetryFilter());
6156
});
6257

58+
// Deferred stale lock cleanup — runs after app starts accepting requests
59+
services.AddHostedService<HangfireStaleLockCleanupService>();
60+
6361
services.AddTransient<IJobService, HangfireService>();
6462

6563
return services;
6664
}
6765

68-
private static void CleanupStaleLocks(string connectionString, IServiceProvider provider)
69-
{
70-
var logger = provider.GetService<ILoggerFactory>()?.CreateLogger("Hangfire");
71-
72-
try
73-
{
74-
using var connection = new NpgsqlConnection(connectionString);
75-
connection.Open();
76-
77-
// Delete locks older than 5 minutes (stale from crashed instances)
78-
using var cmd = new NpgsqlCommand(
79-
"DELETE FROM hangfire.lock WHERE acquired < NOW() - INTERVAL '5 minutes'",
80-
connection);
81-
82-
var deleted = cmd.ExecuteNonQuery();
83-
if (deleted > 0)
84-
{
85-
logger?.LogWarning("Cleaned up {Count} stale Hangfire locks", deleted);
86-
}
87-
}
88-
// Broad catch is intentional: cleanup is best-effort and must not prevent application startup.
89-
// The hangfire schema/table may not exist yet on first run.
90-
catch (Exception ex)
91-
{
92-
logger?.LogDebug(ex, "Could not cleanup stale Hangfire locks (table may not exist yet)");
93-
}
94-
}
95-
9666

9767
public static IApplicationBuilder UseHeroJobDashboard(this IApplicationBuilder app, IConfiguration config)
9868
{
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
using FSH.Framework.Shared.Persistence;
2+
using Microsoft.Extensions.Configuration;
3+
using Microsoft.Extensions.Hosting;
4+
using Microsoft.Extensions.Logging;
5+
using Npgsql;
6+
7+
namespace FSH.Framework.Jobs;
8+
9+
/// <summary>
10+
/// Best-effort cleanup of stale Hangfire locks from crashed instances.
11+
/// Runs as a BackgroundService so it never blocks application startup.
12+
/// </summary>
13+
internal sealed class HangfireStaleLockCleanupService : BackgroundService
14+
{
15+
private readonly IConfiguration _configuration;
16+
private readonly ILogger<HangfireStaleLockCleanupService> _logger;
17+
18+
public HangfireStaleLockCleanupService(
19+
IConfiguration configuration,
20+
ILogger<HangfireStaleLockCleanupService> logger)
21+
{
22+
_configuration = configuration;
23+
_logger = logger;
24+
}
25+
26+
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
27+
{
28+
// Short delay to let Hangfire initialize its schema first
29+
await Task.Delay(TimeSpan.FromSeconds(5), stoppingToken).ConfigureAwait(false);
30+
31+
var dbOptions = _configuration.GetSection(nameof(DatabaseOptions)).Get<DatabaseOptions>();
32+
if (dbOptions is null || !dbOptions.Provider.Equals(DbProviders.PostgreSQL, StringComparison.OrdinalIgnoreCase))
33+
{
34+
return;
35+
}
36+
37+
try
38+
{
39+
await using var connection = new NpgsqlConnection(dbOptions.ConnectionString);
40+
await connection.OpenAsync(stoppingToken).ConfigureAwait(false);
41+
42+
await using var cmd = new NpgsqlCommand(
43+
"DELETE FROM hangfire.lock WHERE acquired < NOW() - INTERVAL '5 minutes'",
44+
connection);
45+
46+
int deleted = await cmd.ExecuteNonQueryAsync(stoppingToken).ConfigureAwait(false);
47+
if (deleted > 0)
48+
{
49+
_logger.LogWarning("Cleaned up {Count} stale Hangfire locks", deleted);
50+
}
51+
}
52+
catch (Exception ex) when (ex is not OperationCanceledException)
53+
{
54+
_logger.LogDebug(ex, "Could not cleanup stale Hangfire locks (table may not exist yet)");
55+
}
56+
}
57+
}

src/Modules/Multitenancy/Modules.Multitenancy/MultitenancyOptions.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,5 @@ public sealed class MultitenancyOptions
1616
/// When true, enqueues tenant provisioning (migrate/seed) jobs on startup for tenants that have not completed provisioning.
1717
/// Useful to ensure the root tenant is provisioned automatically on first run when startup migrations are disabled.
1818
/// </summary>
19-
public bool AutoProvisionOnStartup { get; set; } = true;
19+
public bool AutoProvisionOnStartup { get; set; }
2020
}

src/Modules/Multitenancy/Modules.Multitenancy/Provisioning/TenantAutoProvisioningHostedService.cs

Lines changed: 37 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,12 @@
99

1010
namespace FSH.Modules.Multitenancy.Provisioning;
1111

12-
public sealed class TenantAutoProvisioningHostedService : IHostedService
12+
/// <summary>
13+
/// Auto-provisions tenants that haven't completed provisioning.
14+
/// Runs as a BackgroundService so it does NOT block the host from accepting requests.
15+
/// Includes a brief delay to allow the tenant store initializer and Hangfire to start first.
16+
/// </summary>
17+
public sealed class TenantAutoProvisioningHostedService : BackgroundService
1318
{
1419
private readonly IServiceProvider _serviceProvider;
1520
private readonly ILogger<TenantAutoProvisioningHostedService> _logger;
@@ -26,20 +31,30 @@ public TenantAutoProvisioningHostedService(
2631
_options = options.Value;
2732
}
2833

29-
public async Task StartAsync(CancellationToken cancellationToken)
34+
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
3035
{
3136
if (!ShouldRunProvisioning())
3237
{
3338
return;
3439
}
3540

36-
if (!JobStorageAvailable())
41+
// Wait briefly for the tenant store initializer to complete first
42+
await Task.Delay(TimeSpan.FromSeconds(3), stoppingToken).ConfigureAwait(false);
43+
44+
if (!WaitForJobStorage(stoppingToken))
3745
{
3846
_logger.LogWarning("Hangfire storage not initialized; skipping auto-provisioning enqueue.");
3947
return;
4048
}
4149

42-
await ProvisionTenantsAsync(cancellationToken);
50+
try
51+
{
52+
await ProvisionTenantsAsync(stoppingToken).ConfigureAwait(false);
53+
}
54+
catch (Exception ex) when (ex is not OperationCanceledException)
55+
{
56+
_logger.LogError(ex, "Auto-provisioning failed. Tenants may need manual provisioning.");
57+
}
4358
}
4459

4560
private bool ShouldRunProvisioning() =>
@@ -60,15 +75,15 @@ private async Task ProvisionTenantsAsync(CancellationToken cancellationToken)
6075
break;
6176
}
6277

63-
await TryProvisionTenantAsync(provisioning, tenant, cancellationToken);
78+
await TryProvisionTenantAsync(provisioning, tenant, cancellationToken).ConfigureAwait(false);
6479
}
6580
}
6681

6782
private async Task TryProvisionTenantAsync(ITenantProvisioningService provisioning, AppTenantInfo tenant, CancellationToken cancellationToken)
6883
{
6984
try
7085
{
71-
if (await ShouldProvisionTenantAsync(provisioning, tenant.Id, cancellationToken))
86+
if (await ShouldProvisionTenantAsync(provisioning, tenant.Id, cancellationToken).ConfigureAwait(false))
7287
{
7388
await provisioning.StartAsync(tenant.Id, cancellationToken).ConfigureAwait(false);
7489
if (_logger.IsEnabled(LogLevel.Information))
@@ -101,18 +116,23 @@ private async Task<bool> ShouldProvisionTenantAsync(ITenantProvisioningService p
101116
return latest is null || latest.Status != TenantProvisioningStatus.Completed;
102117
}
103118

104-
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
105-
106-
private static bool JobStorageAvailable()
119+
private static bool WaitForJobStorage(CancellationToken cancellationToken)
107120
{
108-
try
121+
// Retry a few times since Hangfire may still be initializing
122+
for (int i = 0; i < 5; i++)
109123
{
110-
_ = JobStorage.Current;
111-
return true;
112-
}
113-
catch (InvalidOperationException)
114-
{
115-
return false;
124+
if (cancellationToken.IsCancellationRequested) return false;
125+
try
126+
{
127+
_ = JobStorage.Current;
128+
return true;
129+
}
130+
catch (InvalidOperationException)
131+
{
132+
Thread.Sleep(500);
133+
}
116134
}
135+
136+
return false;
117137
}
118-
}
138+
}

src/Modules/Multitenancy/Modules.Multitenancy/Provisioning/TenantStoreInitializerHostedService.cs

Lines changed: 32 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,10 @@
88
namespace FSH.Modules.Multitenancy.Provisioning;
99

1010
/// <summary>
11-
/// Initializes the tenant catalog database and seeds the root tenant on startup.
11+
/// Initializes the tenant catalog database and seeds the root tenant.
12+
/// Runs as a BackgroundService so it does NOT block the host from accepting requests.
1213
/// </summary>
13-
public sealed class TenantStoreInitializerHostedService : IHostedService
14+
public sealed class TenantStoreInitializerHostedService : BackgroundService
1415
{
1516
private readonly IServiceProvider _serviceProvider;
1617
private readonly ILogger<TenantStoreInitializerHostedService> _logger;
@@ -23,31 +24,36 @@ public TenantStoreInitializerHostedService(
2324
_logger = logger;
2425
}
2526

26-
public async Task StartAsync(CancellationToken cancellationToken)
27+
protected override async Task ExecuteAsync(CancellationToken stoppingToken)
2728
{
28-
using var scope = _serviceProvider.CreateScope();
29-
30-
var tenantDbContext = scope.ServiceProvider.GetRequiredService<TenantDbContext>();
31-
await tenantDbContext.Database.MigrateAsync(cancellationToken).ConfigureAwait(false);
32-
_logger.LogInformation("Applied tenant catalog migrations.");
33-
34-
if (await tenantDbContext.TenantInfo.FindAsync([MultitenancyConstants.Root.Id], cancellationToken).ConfigureAwait(false) is null)
29+
try
3530
{
36-
var rootTenant = new AppTenantInfo(
37-
MultitenancyConstants.Root.Id,
38-
MultitenancyConstants.Root.Name,
39-
string.Empty,
40-
MultitenancyConstants.Root.EmailAddress,
41-
issuer: MultitenancyConstants.Root.Issuer);
42-
43-
var validUpto = TimeProvider.System.GetUtcNow().UtcDateTime.AddYears(1);
44-
rootTenant.SetValidity(validUpto);
45-
await tenantDbContext.TenantInfo.AddAsync(rootTenant, cancellationToken).ConfigureAwait(false);
46-
await tenantDbContext.SaveChangesAsync(cancellationToken).ConfigureAwait(false);
47-
48-
_logger.LogInformation("Seeded root tenant.");
31+
using var scope = _serviceProvider.CreateScope();
32+
33+
var tenantDbContext = scope.ServiceProvider.GetRequiredService<TenantDbContext>();
34+
await tenantDbContext.Database.MigrateAsync(stoppingToken).ConfigureAwait(false);
35+
_logger.LogInformation("Applied tenant catalog migrations.");
36+
37+
if (await tenantDbContext.TenantInfo.FindAsync([MultitenancyConstants.Root.Id], stoppingToken).ConfigureAwait(false) is null)
38+
{
39+
var rootTenant = new AppTenantInfo(
40+
MultitenancyConstants.Root.Id,
41+
MultitenancyConstants.Root.Name,
42+
string.Empty,
43+
MultitenancyConstants.Root.EmailAddress,
44+
issuer: MultitenancyConstants.Root.Issuer);
45+
46+
var validUpto = TimeProvider.System.GetUtcNow().UtcDateTime.AddYears(1);
47+
rootTenant.SetValidity(validUpto);
48+
await tenantDbContext.TenantInfo.AddAsync(rootTenant, stoppingToken).ConfigureAwait(false);
49+
await tenantDbContext.SaveChangesAsync(stoppingToken).ConfigureAwait(false);
50+
51+
_logger.LogInformation("Seeded root tenant.");
52+
}
53+
}
54+
catch (Exception ex) when (ex is not OperationCanceledException)
55+
{
56+
_logger.LogError(ex, "Failed to initialize tenant catalog database. Tenant operations may fail until resolved.");
4957
}
5058
}
51-
52-
public Task StopAsync(CancellationToken cancellationToken) => Task.CompletedTask;
53-
}
59+
}

src/Playground/FSH.Api/appsettings.Development.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
"Enabled": true
1313
},
1414
"MultitenancyOptions": {
15-
"RunTenantMigrationsOnStartup": false,
1615
"AutoProvisionOnStartup": true
1716
},
1817
"OpenTelemetryOptions": {

0 commit comments

Comments
 (0)