App health check: Optimize pending migration query (#11049)

## Optimization: Efficient Health Check Query

This PR optimizes the workspace health check system by replacing the N+1
query pattern with efficient database queries.

### Key Improvements

- **Eliminated N+1 Query Problem**: Instead of fetching all workspaces
and then querying each one individually for pending migrations (which
caused slowness in production), we now use a single optimized query to
directly identify workspaces with pending migrations

- **Better Performance**: Reduced the number of database queries from
potentially hundreds/thousands (previous implementation) to just 2 fixed
queries regardless of workspace count

- **Full Coverage Instead of Sampling**: Rather than implementing a cap
on workspace checks at 100 (which was a workaround for performance
issues), this solution addresses the root cause by optimizing the query
pattern. We can now efficiently check all workspaces with pending
migrations without performance penalties.

@FelixMalfait This addresses the "always eager-load when you can"
feedback by handling the problem at the database level rather than just
applying a limit. The optimized query should solve both the performance
issues and provide more accurate health status information.
This commit is contained in:
nitin
2025-03-21 21:00:38 +05:30
committed by GitHub
parent 081f5fa766
commit 3960b0f99d
4 changed files with 170 additions and 87 deletions

View File

@ -1,11 +1,12 @@
import { Module } from '@nestjs/common';
import { TerminusModule } from '@nestjs/terminus';
import { TypeOrmModule } from '@nestjs/typeorm';
import { HealthController } from 'src/engine/core-modules/health/controllers/health.controller';
import { MetricsController } from 'src/engine/core-modules/health/controllers/metrics.controller';
import { AppHealthIndicator } from 'src/engine/core-modules/health/indicators/app.health';
import { RedisClientModule } from 'src/engine/core-modules/redis-client/redis-client.module';
import { ObjectMetadataModule } from 'src/engine/metadata-modules/object-metadata/object-metadata.module';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { WorkspaceMigrationModule } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.module';
import { HealthCacheService } from './health-cache.service';
@ -18,8 +19,8 @@ import { WorkerHealthIndicator } from './indicators/worker.health';
imports: [
TerminusModule,
RedisClientModule,
ObjectMetadataModule,
WorkspaceMigrationModule,
TypeOrmModule.forFeature([Workspace], 'core'),
],
controllers: [HealthController, MetricsController],
providers: [

View File

@ -1,23 +1,27 @@
import { HealthIndicatorService } from '@nestjs/terminus';
import { Test, TestingModule } from '@nestjs/testing';
import { getRepositoryToken } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { AppHealthIndicator } from 'src/engine/core-modules/health/indicators/app.health';
import { ObjectMetadataService } from 'src/engine/metadata-modules/object-metadata/object-metadata.service';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { WorkspaceMigrationService } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.service';
describe('AppHealthIndicator', () => {
let service: AppHealthIndicator;
let objectMetadataService: jest.Mocked<ObjectMetadataService>;
let workspaceRepository: jest.Mocked<Repository<Workspace>>;
let workspaceMigrationService: jest.Mocked<WorkspaceMigrationService>;
let healthIndicatorService: jest.Mocked<HealthIndicatorService>;
beforeEach(async () => {
objectMetadataService = {
findMany: jest.fn(),
workspaceRepository = {
count: jest.fn(),
} as any;
workspaceMigrationService = {
getPendingMigrations: jest.fn(),
getWorkspacesWithPendingMigrations: jest.fn(),
countWorkspacesWithPendingMigrations: jest.fn(),
} as any;
healthIndicatorService = {
@ -35,10 +39,9 @@ describe('AppHealthIndicator', () => {
providers: [
AppHealthIndicator,
{
provide: ObjectMetadataService,
useValue: objectMetadataService,
provide: getRepositoryToken(Workspace, 'core'),
useValue: workspaceRepository,
},
{
provide: WorkspaceMigrationService,
useValue: workspaceMigrationService,
@ -63,62 +66,85 @@ describe('AppHealthIndicator', () => {
});
it('should return up status when no issues and no pending migrations', async () => {
objectMetadataService.findMany.mockResolvedValue([
{
id: '1',
workspaceId: 'workspace1',
} as any,
{
id: '2',
workspaceId: 'workspace2',
} as any,
]);
workspaceRepository.count.mockResolvedValue(2);
workspaceMigrationService.getPendingMigrations.mockResolvedValue([]);
workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
0,
);
workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
[],
);
const result = await service.isHealthy();
expect(result.app.status).toBe('up');
expect(result.app.details.overview.totalWorkspacesCount).toBe(2);
expect(result.app.details.overview.criticalWorkspacesCount).toBe(0);
expect(result.app.details.criticalWorkspaces).toBe(null);
expect(result.app.details.overview.erroredWorkspaceCount).toBe(0);
expect(result.app.details.erroredWorkspace).toBe(null);
expect(result.app.details.system.nodeVersion).toBeDefined();
expect(result.app.details.system.timestamp).toBeDefined();
});
it('should return down status when there are pending migrations', async () => {
objectMetadataService.findMany.mockResolvedValue([
{
id: '1',
workspaceId: 'workspace1',
} as any,
]);
workspaceRepository.count.mockResolvedValue(5);
workspaceMigrationService.getPendingMigrations.mockResolvedValue([
{
id: '1',
createdAt: new Date(),
migrations: [],
name: 'migration1',
isCustom: false,
workspaceId: 'workspace1',
} as any,
]);
// Mock a total count that's higher than the sample
workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
10,
);
workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
[
{
workspaceId: 'workspace1',
pendingMigrations: 1,
},
{
workspaceId: 'workspace2',
pendingMigrations: 3,
},
{
workspaceId: 'workspace3',
pendingMigrations: 2,
},
],
);
const result = await service.isHealthy();
expect(result.app.status).toBe('down');
expect(result.app.details.overview.criticalWorkspacesCount).toBe(1);
expect(result.app.details.criticalWorkspaces).toEqual([
{
workspaceId: 'workspace1',
pendingMigrations: 1,
expect(result.app.message).toBe(
'Found 10 workspaces with pending migrations',
);
expect(result.app.details).toEqual({
system: {
nodeVersion: process.version,
timestamp: expect.any(String),
},
]);
overview: {
totalWorkspacesCount: 5,
erroredWorkspaceCount: 10,
},
erroredWorkspace: [
{
workspaceId: 'workspace1',
pendingMigrations: 1,
},
{
workspaceId: 'workspace2',
pendingMigrations: 3,
},
{
workspaceId: 'workspace3',
pendingMigrations: 2,
},
],
});
});
it('should handle errors gracefully and maintain state history', async () => {
objectMetadataService.findMany.mockRejectedValue(
workspaceRepository.count.mockRejectedValue(
new Error('Database connection failed'),
);
@ -133,18 +159,18 @@ describe('AppHealthIndicator', () => {
it('should maintain state history across health checks', async () => {
// First check - healthy state
objectMetadataService.findMany.mockResolvedValue([
{
id: '1',
workspaceId: 'workspace1',
} as any,
]);
workspaceMigrationService.getPendingMigrations.mockResolvedValue([]);
workspaceRepository.count.mockResolvedValue(2);
workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
0,
);
workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
[],
);
await service.isHealthy();
// Second check - error state
objectMetadataService.findMany.mockRejectedValue(
workspaceRepository.count.mockRejectedValue(
new Error('Database connection failed'),
);
@ -155,4 +181,34 @@ describe('AppHealthIndicator', () => {
expect(result.app.details.stateHistory.timestamp).toBeDefined();
expect(result.app.details.stateHistory.details).toBeDefined();
});
it('should sample workspaces with pending migrations up to limit', async () => {
workspaceRepository.count.mockResolvedValue(1000);
// Mock a total count higher than the sample
workspaceMigrationService.countWorkspacesWithPendingMigrations.mockResolvedValue(
500,
);
const sampleWorkspaces = Array(300)
.fill(0)
.map((_, i) => ({
workspaceId: `workspace${i}`,
pendingMigrations: (i % 3) + 1,
}));
workspaceMigrationService.getWorkspacesWithPendingMigrations.mockResolvedValue(
sampleWorkspaces,
);
const result = await service.isHealthy();
expect(result.app.status).toBe('down');
expect(result.app.message).toBe(
'Found 500 workspaces with pending migrations',
);
expect(result.app.details.overview.totalWorkspacesCount).toBe(1000);
expect(result.app.details.overview.erroredWorkspaceCount).toBe(500);
expect(result.app.details.erroredWorkspace.length).toBe(300);
});
});

View File

@ -3,9 +3,12 @@ import {
HealthIndicatorResult,
HealthIndicatorService,
} from '@nestjs/terminus';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { HealthStateManager } from 'src/engine/core-modules/health/utils/health-state-manager.util';
import { ObjectMetadataService } from 'src/engine/metadata-modules/object-metadata/object-metadata.service';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { WorkspaceMigrationService } from 'src/engine/metadata-modules/workspace-migration/workspace-migration.service';
@Injectable()
@ -14,7 +17,8 @@ export class AppHealthIndicator {
constructor(
private readonly healthIndicatorService: HealthIndicatorService,
private readonly objectMetadataService: ObjectMetadataService,
@InjectRepository(Workspace, 'core')
private readonly workspaceRepository: Repository<Workspace>,
private readonly workspaceMigrationService: WorkspaceMigrationService,
) {}
@ -22,23 +26,15 @@ export class AppHealthIndicator {
const indicator = this.healthIndicatorService.check('app');
try {
const workspaces = await this.objectMetadataService.findMany();
const workspaceIds = [...new Set(workspaces.map((w) => w.workspaceId))];
const totalErroredWorkspacesCount =
await this.workspaceMigrationService.countWorkspacesWithPendingMigrations();
const workspaceStats = await Promise.all(
workspaceIds.map(async (workspaceId) => {
const pendingMigrations =
await this.workspaceMigrationService.getPendingMigrations(
workspaceId,
);
const sampledErroredWorkspaces =
await this.workspaceMigrationService.getWorkspacesWithPendingMigrations(
500,
);
return {
workspaceId,
pendingMigrations: pendingMigrations.length,
isCritical: pendingMigrations.length > 0,
};
}),
);
const totalWorkspaceCount = await this.workspaceRepository.count();
const details = {
system: {
@ -46,25 +42,19 @@ export class AppHealthIndicator {
timestamp: new Date().toISOString(),
},
overview: {
totalWorkspacesCount: workspaceIds.length,
criticalWorkspacesCount: workspaceStats.filter(
(stat) => stat.isCritical,
).length,
totalWorkspacesCount: totalWorkspaceCount,
erroredWorkspaceCount: totalErroredWorkspacesCount,
},
criticalWorkspaces:
workspaceStats.filter((stat) => stat.isCritical).length > 0
? workspaceStats
.filter((stat) => stat.isCritical)
.map((stat) => ({
workspaceId: stat.workspaceId,
pendingMigrations: stat.pendingMigrations,
}))
erroredWorkspace:
totalErroredWorkspacesCount > 0
? sampledErroredWorkspaces.map((workspace) => ({
workspaceId: workspace.workspaceId,
pendingMigrations: workspace.pendingMigrations,
}))
: null,
};
const isHealthy = workspaceStats.every((stat) => !stat.isCritical);
if (isHealthy) {
if (totalErroredWorkspacesCount === 0) {
this.stateManager.updateState(details);
return indicator.up({ details });
@ -73,7 +63,7 @@ export class AppHealthIndicator {
this.stateManager.updateState(details);
return indicator.down({
message: `Found ${details.criticalWorkspaces?.length} workspaces with pending migrations`,
message: `Found ${totalErroredWorkspacesCount} workspaces with pending migrations`,
details,
});
} catch (error) {

View File

@ -49,6 +49,42 @@ export class WorkspaceMigrationService {
});
}
/**
* Find workspaces with pending migrations
*
* @returns Promise<{ workspaceId: string; pendingMigrations: number }[]>
*/
public async getWorkspacesWithPendingMigrations(limit: number) {
const results = await this.workspaceMigrationRepository
.createQueryBuilder('workspaceMigration')
.select('workspaceMigration.workspaceId', 'workspaceId')
.addSelect('COUNT(*)', 'pendingCount')
.where('workspaceMigration.appliedAt IS NULL')
.groupBy('workspaceMigration.workspaceId')
.limit(limit)
.getRawMany();
return results.map((result) => ({
workspaceId: result.workspaceId,
pendingMigrations: Number(result.pendingCount) || 0,
}));
}
/**
* Count total number of workspaces with pending migrations
*
* @returns Promise<number>
*/
public async countWorkspacesWithPendingMigrations(): Promise<number> {
const result = await this.workspaceMigrationRepository
.createQueryBuilder('workspaceMigration')
.select('COUNT(DISTINCT workspaceMigration.workspaceId)', 'count')
.where('workspaceMigration.appliedAt IS NULL')
.getRawOne();
return Number(result.count) || 0;
}
/**
* Set appliedAt as current date for a given migration.
* Should be called once the migration has been applied