App health check: Optimize pending migration query (#11049)

## Optimization: Efficient Health Check Query

This PR optimizes the workspace health check system by replacing the N+1
query pattern with efficient database queries.

### Key Improvements

- **Eliminated N+1 Query Problem**: Instead of fetching all workspaces
and then querying each one individually for pending migrations (which
caused slowness in production), we now use a single optimized query to
directly identify workspaces with pending migrations

- **Better Performance**: Reduced the number of database queries from
potentially hundreds/thousands (previous implementation) to just 2 fixed
queries regardless of workspace count

- **Full Coverage Instead of Sampling**: Rather than implementing a cap
on workspace checks at 100 (which was a workaround for performance
issues), this solution addresses the root cause by optimizing the query
pattern. We can now efficiently check all workspaces with pending
migrations without performance penalties.

@FelixMalfait This addresses the "always eager-load when you can"
feedback by handling the problem at the database level rather than just
applying a limit. The optimized query should solve both the performance
issues and provide more accurate health status information.
This commit is contained in:
nitin
2025-03-21 21:00:38 +05:30
committed by GitHub
parent 081f5fa766
commit 3960b0f99d
4 changed files with 170 additions and 87 deletions

View File

@ -1,11 +1,12 @@
import { Module } from '@nestjs/common'; import { Module } from '@nestjs/common';
import { TerminusModule } from '@nestjs/terminus'; import { TerminusModule } from '@nestjs/terminus';
import { TypeOrmModule } from '@nestjs/typeorm';
import { HealthController } from 'src/engine/core-modules/health/controllers/health.controller'; import { HealthController } from 'src/engine/core-modules/health/controllers/health.controller';
import { MetricsController } from 'src/engine/core-modules/health/controllers/metrics.controller'; import { MetricsController } from 'src/engine/core-modules/health/controllers/metrics.controller';
import { AppHealthIndicator } from 'src/engine/core-modules/health/indicators/app.health'; import { AppHealthIndicator } from 'src/engine/core-modules/health/indicators/app.health';
import { RedisClientModule } from 'src/engine/core-modules/redis-client/redis-client.module'; import { RedisClientModule } from 'src/engine/core-modules/redis-client/redis-client.module';
import { ObjectMetadataModule } from 'src/engine/metadata-modules/object-metadata/object-metadata.module'; import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { WorkspaceMigrationModule } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.module'; import { WorkspaceMigrationModule } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.module';
import { HealthCacheService } from './health-cache.service'; import { HealthCacheService } from './health-cache.service';
@ -18,8 +19,8 @@ import { WorkerHealthIndicator } from './indicators/worker.health';
imports: [ imports: [
TerminusModule, TerminusModule,
RedisClientModule, RedisClientModule,
ObjectMetadataModule,
WorkspaceMigrationModule, WorkspaceMigrationModule,
TypeOrmModule.forFeature([Workspace], 'core'),
], ],
controllers: [HealthController, MetricsController], controllers: [HealthController, MetricsController],
providers: [ providers: [

View File

@ -1,23 +1,27 @@
import { HealthIndicatorService } from '@nestjs/terminus'; import { HealthIndicatorService } from '@nestjs/terminus';
import { Test, TestingModule } from '@nestjs/testing'; import { Test, TestingModule } from '@nestjs/testing';
import { getRepositoryToken } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { AppHealthIndicator } from 'src/engine/core-modules/health/indicators/app.health'; import { AppHealthIndicator } from 'src/engine/core-modules/health/indicators/app.health';
import { ObjectMetadataService } from 'src/engine/metadata-modules/object-metadata/object-metadata.service'; import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { WorkspaceMigrationService } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.service'; import { WorkspaceMigrationService } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.service';
describe('AppHealthIndicator', () => { describe('AppHealthIndicator', () => {
let service: AppHealthIndicator; let service: AppHealthIndicator;
let objectMetadataService: jest.Mocked<ObjectMetadataService>; let workspaceRepository: jest.Mocked<Repository<Workspace>>;
let workspaceMigrationService: jest.Mocked<WorkspaceMigrationService>; let workspaceMigrationService: jest.Mocked<WorkspaceMigrationService>;
let healthIndicatorService: jest.Mocked<HealthIndicatorService>; let healthIndicatorService: jest.Mocked<HealthIndicatorService>;
beforeEach(async () => { beforeEach(async () => {
objectMetadataService = { workspaceRepository = {
findMany: jest.fn(), count: jest.fn(),
} as any; } as any;
workspaceMigrationService = { workspaceMigrationService = {
getPendingMigrations: jest.fn(), getWorkspacesWithPendingMigrations: jest.fn(),
countWorkspacesWithPendingMigrations: jest.fn(),
} as any; } as any;
healthIndicatorService = { healthIndicatorService = {
@ -35,10 +39,9 @@ describe('AppHealthIndicator', () => {
providers: [ providers: [
AppHealthIndicator, AppHealthIndicator,
{ {
provide: ObjectMetadataService, provide: getRepositoryToken(Workspace, 'core'),
useValue: objectMetadataService, useValue: workspaceRepository,
}, },
{ {
provide: WorkspaceMigrationService, provide: WorkspaceMigrationService,
useValue: workspaceMigrationService, useValue: workspaceMigrationService,
@ -63,62 +66,85 @@ describe('AppHealthIndicator', () => {
}); });
it('should return up status when no issues and no pending migrations', async () => { it('should return up status when no issues and no pending migrations', async () => {
objectMetadataService.findMany.mockResolvedValue([ workspaceRepository.count.mockResolvedValue(2);
{
id: '1',
workspaceId: 'workspace1',
} as any,
{
id: '2',
workspaceId: 'workspace2',
} as any,
]);
workspaceMigrationService.getPendingMigrations.mockResolvedValue([]); workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
0,
);
workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
[],
);
const result = await service.isHealthy(); const result = await service.isHealthy();
expect(result.app.status).toBe('up'); expect(result.app.status).toBe('up');
expect(result.app.details.overview.totalWorkspacesCount).toBe(2); expect(result.app.details.overview.totalWorkspacesCount).toBe(2);
expect(result.app.details.overview.criticalWorkspacesCount).toBe(0); expect(result.app.details.overview.erroredWorkspaceCount).toBe(0);
expect(result.app.details.criticalWorkspaces).toBe(null); expect(result.app.details.erroredWorkspace).toBe(null);
expect(result.app.details.system.nodeVersion).toBeDefined(); expect(result.app.details.system.nodeVersion).toBeDefined();
expect(result.app.details.system.timestamp).toBeDefined(); expect(result.app.details.system.timestamp).toBeDefined();
}); });
it('should return down status when there are pending migrations', async () => { it('should return down status when there are pending migrations', async () => {
objectMetadataService.findMany.mockResolvedValue([ workspaceRepository.count.mockResolvedValue(5);
{
id: '1',
workspaceId: 'workspace1',
} as any,
]);
workspaceMigrationService.getPendingMigrations.mockResolvedValue([ // Mock a total count that's higher than the sample
{ workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
id: '1', 10,
createdAt: new Date(), );
migrations: [],
name: 'migration1', workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
isCustom: false, [
workspaceId: 'workspace1', {
} as any, workspaceId: 'workspace1',
]); pendingMigrations: 1,
},
{
workspaceId: 'workspace2',
pendingMigrations: 3,
},
{
workspaceId: 'workspace3',
pendingMigrations: 2,
},
],
);
const result = await service.isHealthy(); const result = await service.isHealthy();
expect(result.app.status).toBe('down'); expect(result.app.status).toBe('down');
expect(result.app.details.overview.criticalWorkspacesCount).toBe(1); expect(result.app.message).toBe(
expect(result.app.details.criticalWorkspaces).toEqual([ 'Found 10 workspaces with pending migrations',
{ );
workspaceId: 'workspace1',
pendingMigrations: 1, expect(result.app.details).toEqual({
system: {
nodeVersion: process.version,
timestamp: expect.any(String),
}, },
]); overview: {
totalWorkspacesCount: 5,
erroredWorkspaceCount: 10,
},
erroredWorkspace: [
{
workspaceId: 'workspace1',
pendingMigrations: 1,
},
{
workspaceId: 'workspace2',
pendingMigrations: 3,
},
{
workspaceId: 'workspace3',
pendingMigrations: 2,
},
],
});
}); });
it('should handle errors gracefully and maintain state history', async () => { it('should handle errors gracefully and maintain state history', async () => {
objectMetadataService.findMany.mockRejectedValue( workspaceRepository.count.mockRejectedValue(
new Error('Database connection failed'), new Error('Database connection failed'),
); );
@ -133,18 +159,18 @@ describe('AppHealthIndicator', () => {
it('should maintain state history across health checks', async () => { it('should maintain state history across health checks', async () => {
// First check - healthy state // First check - healthy state
objectMetadataService.findMany.mockResolvedValue([ workspaceRepository.count.mockResolvedValue(2);
{ workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
id: '1', 0,
workspaceId: 'workspace1', );
} as any, workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
]); [],
workspaceMigrationService.getPendingMigrations.mockResolvedValue([]); );
await service.isHealthy(); await service.isHealthy();
// Second check - error state // Second check - error state
objectMetadataService.findMany.mockRejectedValue( workspaceRepository.count.mockRejectedValue(
new Error('Database connection failed'), new Error('Database connection failed'),
); );
@ -155,4 +181,34 @@ describe('AppHealthIndicator', () => {
expect(result.app.details.stateHistory.timestamp).toBeDefined(); expect(result.app.details.stateHistory.timestamp).toBeDefined();
expect(result.app.details.stateHistory.details).toBeDefined(); expect(result.app.details.stateHistory.details).toBeDefined();
}); });
it('should sample workspaces with pending migrations up to limit', async () => {
workspaceRepository.count.mockResolvedValue(1000);
// Mock a total count higher than the sample
workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
500,
);
const sampleWorkspaces = Array(300)
.fill(0)
.map((_, i) => ({
workspaceId: `workspace${i}`,
pendingMigrations: (i % 3) + 1,
}));
workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
sampleWorkspaces,
);
const result = await service.isHealthy();
expect(result.app.status).toBe('down');
expect(result.app.message).toBe(
'Found 500 workspaces with pending migrations',
);
expect(result.app.details.overview.totalWorkspacesCount).toBe(1000);
expect(result.app.details.overview.erroredWorkspaceCount).toBe(500);
expect(result.app.details.erroredWorkspace.length).toBe(300);
});
}); });

View File

@ -3,9 +3,12 @@ import {
HealthIndicatorResult, HealthIndicatorResult,
HealthIndicatorService, HealthIndicatorService,
} from '@nestjs/terminus'; } from '@nestjs/terminus';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { HealthStateManager } from 'src/engine/core-modules/health/utils/health-state-manager.util'; import { HealthStateManager } from 'src/engine/core-modules/health/utils/health-state-manager.util';
import { ObjectMetadataService } from 'src/engine/metadata-modules/object-metadata/object-metadata.service'; import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { WorkspaceMigrationService } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.service'; import { WorkspaceMigrationService } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.service';
@Injectable() @Injectable()
@ -14,7 +17,8 @@ export class AppHealthIndicator {
constructor( constructor(
private readonly healthIndicatorService: HealthIndicatorService, private readonly healthIndicatorService: HealthIndicatorService,
private readonly objectMetadataService: ObjectMetadataService, @InjectRepository(Workspace, 'core')
private readonly workspaceRepository: Repository<Workspace>,
private readonly workspaceMigrationService: WorkspaceMigrationService, private readonly workspaceMigrationService: WorkspaceMigrationService,
) {} ) {}
@ -22,23 +26,15 @@ export class AppHealthIndicator {
const indicator = this.healthIndicatorService.check('app'); const indicator = this.healthIndicatorService.check('app');
try { try {
const workspaces = await this.objectMetadataService.findMany(); const totalErroredWorkspacesCount =
const workspaceIds = [...new Set(workspaces.map((w) => w.workspaceId))]; await this.workspaceMigrationService.countWorkspacesWithPendingMigrations();
const workspaceStats = await Promise.all( const sampledErroredWorkspaces =
workspaceIds.map(async (workspaceId) => { await this.workspaceMigrationService.getWorkspacesWithPendingMigrations(
const pendingMigrations = 500,
await this.workspaceMigrationService.getPendingMigrations( );
workspaceId,
);
return { const totalWorkspaceCount = await this.workspaceRepository.count();
workspaceId,
pendingMigrations: pendingMigrations.length,
isCritical: pendingMigrations.length > 0,
};
}),
);
const details = { const details = {
system: { system: {
@ -46,25 +42,19 @@ export class AppHealthIndicator {
timestamp: new Date().toISOString(), timestamp: new Date().toISOString(),
}, },
overview: { overview: {
totalWorkspacesCount: workspaceIds.length, totalWorkspacesCount: totalWorkspaceCount,
criticalWorkspacesCount: workspaceStats.filter( erroredWorkspaceCount: totalErroredWorkspacesCount,
(stat) => stat.isCritical,
).length,
}, },
criticalWorkspaces: erroredWorkspace:
workspaceStats.filter((stat) => stat.isCritical).length > 0 totalErroredWorkspacesCount > 0
? workspaceStats ? sampledErroredWorkspaces.map((workspace) => ({
.filter((stat) => stat.isCritical) workspaceId: workspace.workspaceId,
.map((stat) => ({ pendingMigrations: workspace.pendingMigrations,
workspaceId: stat.workspaceId, }))
pendingMigrations: stat.pendingMigrations,
}))
: null, : null,
}; };
const isHealthy = workspaceStats.every((stat) => !stat.isCritical); if (totalErroredWorkspacesCount === 0) {
if (isHealthy) {
this.stateManager.updateState(details); this.stateManager.updateState(details);
return indicator.up({ details }); return indicator.up({ details });
@ -73,7 +63,7 @@ export class AppHealthIndicator {
this.stateManager.updateState(details); this.stateManager.updateState(details);
return indicator.down({ return indicator.down({
message: `Found ${details.criticalWorkspaces?.length} workspaces with pending migrations`, message: `Found ${totalErroredWorkspacesCount} workspaces with pending migrations`,
details, details,
}); });
} catch (error) { } catch (error) {

View File

@ -49,6 +49,42 @@ export class WorkspaceMigrationService {
}); });
} }
/**
* Find workspaces with pending migrations
*
* @returns Promise<{ workspaceId: string; pendingMigrations: number }[]>
*/
public async getWorkspacesWithPendingMigrations(limit: number) {
const results = await this.workspaceMigrationRepository
.createQueryBuilder('workspaceMigration')
.select('workspaceMigration.workspaceId', 'workspaceId')
.addSelect('COUNT(*)', 'pendingCount')
.where('workspaceMigration.appliedAt IS NULL')
.groupBy('workspaceMigration.workspaceId')
.limit(limit)
.getRawMany();
return results.map((result) => ({
workspaceId: result.workspaceId,
pendingMigrations: Number(result.pendingCount) || 0,
}));
}
/**
* Count total number of workspaces with pending migrations
*
* @returns Promise<number>
*/
public async countWorkspacesWithPendingMigrations(): Promise<number> {
const result = await this.workspaceMigrationRepository
.createQueryBuilder('workspaceMigration')
.select('COUNT(DISTINCT workspaceMigration.workspaceId)', 'count')
.where('workspaceMigration.appliedAt IS NULL')
.getRawOne();
return Number(result.count) || 0;
}
/** /**
* Set appliedAt as current date for a given migration. * Set appliedAt as current date for a given migration.
* Should be called once the migration has been applied * Should be called once the migration has been applied