Health monitor status for admin panel (#10186)

# Health Monitoring for Self-Hosted Instances

This PR implements basic health monitoring for self-hosted instances in
the admin panel.

## Service Status Checks
We're adding real-time health checks for:
- Redis Connection
- Database Connection
- Worker Status
- Message Sync Status

## Existing Functionality
We already have message sync and captcha counters that store aggregated
metrics in cache within a configurable time window (default: 5 minutes).

## New Endpoints
1. `/healthz` - Basic server health check for Kubernetes pod monitoring
2. `/healthz/{serviceName}` - Individual service health checks (returns
200 if healthy)
3. `/metricsz/{metricName}` - Time-windowed metrics (message sync,
captcha)
4. GraphQL resolver in admin panel for UI consumption

All endpoints use the same underlying service, with different
presentation layers for infrastructure and UI needs.

---------

Co-authored-by: Félix Malfait <felix@twenty.com>
This commit is contained in:
nitin
2025-02-18 20:22:19 +05:30
committed by GitHub
parent 2fca60436b
commit d6655a2c3b
54 changed files with 2307 additions and 95 deletions

View File

@ -0,0 +1,193 @@
import { Test, TestingModule } from '@nestjs/testing';
import { AdminPanelHealthService } from 'src/engine/core-modules/admin-panel/admin-panel-health.service';
import { SystemHealth } from 'src/engine/core-modules/admin-panel/dtos/system-health.dto';
import { AdminPanelHealthServiceStatus } from 'src/engine/core-modules/admin-panel/enums/admin-panel-health-service-status.enum';
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
import { MessageSyncHealthIndicator } from 'src/engine/core-modules/health/indicators/message-sync.health';
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
describe('AdminPanelHealthService', () => {
let service: AdminPanelHealthService;
let databaseHealth: jest.Mocked<DatabaseHealthIndicator>;
let redisHealth: jest.Mocked<RedisHealthIndicator>;
let workerHealth: jest.Mocked<WorkerHealthIndicator>;
let messageSyncHealth: jest.Mocked<MessageSyncHealthIndicator>;
beforeEach(async () => {
databaseHealth = {
isHealthy: jest.fn(),
} as any;
redisHealth = {
isHealthy: jest.fn(),
} as any;
workerHealth = {
isHealthy: jest.fn(),
} as any;
messageSyncHealth = {
isHealthy: jest.fn(),
} as any;
const module: TestingModule = await Test.createTestingModule({
providers: [
AdminPanelHealthService,
{
provide: DatabaseHealthIndicator,
useValue: databaseHealth,
},
{
provide: RedisHealthIndicator,
useValue: redisHealth,
},
{
provide: WorkerHealthIndicator,
useValue: workerHealth,
},
{
provide: MessageSyncHealthIndicator,
useValue: messageSyncHealth,
},
],
}).compile();
service = module.get<AdminPanelHealthService>(AdminPanelHealthService);
});
it('should be defined', () => {
expect(service).toBeDefined();
});
it('should transform health check response to SystemHealth format', async () => {
databaseHealth.isHealthy.mockResolvedValue({
database: {
status: 'up',
details: 'Database is healthy',
},
});
redisHealth.isHealthy.mockResolvedValue({
redis: {
status: 'up',
details: 'Redis is connected',
},
});
workerHealth.isHealthy.mockResolvedValue({
worker: {
status: 'up',
queues: [
{
name: 'test',
workers: 1,
metrics: {
active: 1,
completed: 0,
delayed: 4,
failed: 3,
waiting: 0,
prioritized: 0,
},
},
],
},
});
messageSyncHealth.isHealthy.mockResolvedValue({
messageSync: {
status: 'up',
details: 'Message sync is operational',
},
});
const result = await service.getSystemHealthStatus();
const expected: SystemHealth = {
database: {
status: AdminPanelHealthServiceStatus.OPERATIONAL,
details: '"Database is healthy"',
queues: undefined,
},
redis: {
status: AdminPanelHealthServiceStatus.OPERATIONAL,
details: '"Redis is connected"',
queues: undefined,
},
worker: {
status: AdminPanelHealthServiceStatus.OPERATIONAL,
details: undefined,
queues: [
{
name: 'test',
workers: 1,
status: AdminPanelHealthServiceStatus.OPERATIONAL,
metrics: {
active: 1,
completed: 0,
delayed: 4,
failed: 3,
waiting: 0,
prioritized: 0,
},
},
],
},
messageSync: {
status: AdminPanelHealthServiceStatus.OPERATIONAL,
details: '"Message sync is operational"',
queues: undefined,
},
};
expect(result).toStrictEqual(expected);
});
it('should handle mixed health statuses', async () => {
databaseHealth.isHealthy.mockResolvedValue({
database: { status: 'up' },
});
redisHealth.isHealthy.mockRejectedValue(
new Error(HEALTH_ERROR_MESSAGES.REDIS_CONNECTION_FAILED),
);
workerHealth.isHealthy.mockResolvedValue({
worker: { status: 'up', queues: [] },
});
messageSyncHealth.isHealthy.mockResolvedValue({
messageSync: { status: 'up' },
});
const result = await service.getSystemHealthStatus();
expect(result).toMatchObject({
database: { status: AdminPanelHealthServiceStatus.OPERATIONAL },
redis: { status: AdminPanelHealthServiceStatus.OUTAGE },
worker: { status: AdminPanelHealthServiceStatus.OPERATIONAL },
messageSync: { status: AdminPanelHealthServiceStatus.OPERATIONAL },
});
});
it('should handle all services down', async () => {
databaseHealth.isHealthy.mockRejectedValue(
new Error(HEALTH_ERROR_MESSAGES.DATABASE_CONNECTION_FAILED),
);
redisHealth.isHealthy.mockRejectedValue(
new Error(HEALTH_ERROR_MESSAGES.REDIS_CONNECTION_FAILED),
);
workerHealth.isHealthy.mockRejectedValue(
new Error(HEALTH_ERROR_MESSAGES.NO_ACTIVE_WORKERS),
);
messageSyncHealth.isHealthy.mockRejectedValue(
new Error(HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_CHECK_FAILED),
);
const result = await service.getSystemHealthStatus();
expect(result).toMatchObject({
database: { status: AdminPanelHealthServiceStatus.OUTAGE },
redis: { status: AdminPanelHealthServiceStatus.OUTAGE },
worker: { status: AdminPanelHealthServiceStatus.OUTAGE },
messageSync: { status: AdminPanelHealthServiceStatus.OUTAGE },
});
});
});

View File

@ -0,0 +1,108 @@
import { Injectable } from '@nestjs/common';
import { HealthIndicatorResult } from '@nestjs/terminus';
import { AdminPanelHealthServiceData } from 'src/engine/core-modules/admin-panel/dtos/admin-panel-health-service-data.dto';
import { AdminPanelIndicatorHealthStatusInputEnum } from 'src/engine/core-modules/admin-panel/dtos/admin-panel-indicator-health-status.input';
import { SystemHealth } from 'src/engine/core-modules/admin-panel/dtos/system-health.dto';
import { AdminPanelHealthServiceStatus } from 'src/engine/core-modules/admin-panel/enums/admin-panel-health-service-status.enum';
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
import { MessageSyncHealthIndicator } from 'src/engine/core-modules/health/indicators/message-sync.health';
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
@Injectable()
export class AdminPanelHealthService {
constructor(
private readonly databaseHealth: DatabaseHealthIndicator,
private readonly redisHealth: RedisHealthIndicator,
private readonly workerHealth: WorkerHealthIndicator,
private readonly messageSyncHealth: MessageSyncHealthIndicator,
) {}
private readonly healthIndicators = {
database: this.databaseHealth,
redis: this.redisHealth,
worker: this.workerHealth,
messageSync: this.messageSyncHealth,
};
private getServiceStatus(
result: PromiseSettledResult<HealthIndicatorResult>,
) {
if (result.status === 'fulfilled') {
const key = Object.keys(result.value)[0];
const serviceResult = result.value[key];
const details = serviceResult.details;
return {
status:
serviceResult.status === 'up'
? AdminPanelHealthServiceStatus.OPERATIONAL
: AdminPanelHealthServiceStatus.OUTAGE,
details: details ? JSON.stringify(details) : undefined,
queues: serviceResult.queues,
};
}
return {
status: AdminPanelHealthServiceStatus.OUTAGE,
details: result.reason?.message,
};
}
async getIndicatorHealthStatus(
indicatorName: AdminPanelIndicatorHealthStatusInputEnum,
): Promise<AdminPanelHealthServiceData> {
const healthIndicator = this.healthIndicators[indicatorName];
if (!healthIndicator) {
throw new Error(`Health indicator not found: ${indicatorName}`);
}
const result = await Promise.allSettled([healthIndicator.isHealthy()]);
const indicatorStatus = this.getServiceStatus(result[0]);
if (indicatorName === 'worker') {
return {
...indicatorStatus,
queues: (indicatorStatus?.queues ?? []).map((queue) => ({
...queue,
status:
queue.workers > 0
? AdminPanelHealthServiceStatus.OPERATIONAL
: AdminPanelHealthServiceStatus.OUTAGE,
})),
};
}
return indicatorStatus;
}
async getSystemHealthStatus(): Promise<SystemHealth> {
const [databaseResult, redisResult, workerResult, messageSyncResult] =
await Promise.allSettled([
this.databaseHealth.isHealthy(),
this.redisHealth.isHealthy(),
this.workerHealth.isHealthy(),
this.messageSyncHealth.isHealthy(),
]);
const workerStatus = this.getServiceStatus(workerResult);
return {
database: this.getServiceStatus(databaseResult),
redis: this.getServiceStatus(redisResult),
worker: {
...workerStatus,
queues: (workerStatus?.queues ?? []).map((queue) => ({
...queue,
status:
queue.workers > 0
? AdminPanelHealthServiceStatus.OPERATIONAL
: AdminPanelHealthServiceStatus.OUTAGE,
})),
},
messageSync: this.getServiceStatus(messageSyncResult),
};
}
}

View File

@ -1,21 +1,28 @@
import { Module } from '@nestjs/common';
import { TerminusModule } from '@nestjs/terminus';
import { TypeOrmModule } from '@nestjs/typeorm';
import { AdminPanelHealthService } from 'src/engine/core-modules/admin-panel/admin-panel-health.service';
import { AdminPanelResolver } from 'src/engine/core-modules/admin-panel/admin-panel.resolver';
import { AdminPanelService } from 'src/engine/core-modules/admin-panel/admin-panel.service';
import { AuthModule } from 'src/engine/core-modules/auth/auth.module';
import { DomainManagerModule } from 'src/engine/core-modules/domain-manager/domain-manager.module';
import { FeatureFlag } from 'src/engine/core-modules/feature-flag/feature-flag.entity';
import { HealthModule } from 'src/engine/core-modules/health/health.module';
import { RedisClientModule } from 'src/engine/core-modules/redis-client/redis-client.module';
import { User } from 'src/engine/core-modules/user/user.entity';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { DomainManagerModule } from 'src/engine/core-modules/domain-manager/domain-manager.module';
@Module({
imports: [
TypeOrmModule.forFeature([User, Workspace, FeatureFlag], 'core'),
AuthModule,
DomainManagerModule,
HealthModule,
RedisClientModule,
TerminusModule,
],
providers: [AdminPanelResolver, AdminPanelService],
providers: [AdminPanelResolver, AdminPanelService, AdminPanelHealthService],
exports: [AdminPanelService],
})
export class AdminPanelModule {}

View File

@ -1,10 +1,12 @@
import { UseFilters, UseGuards } from '@nestjs/common';
import { Args, Mutation, Query, Resolver } from '@nestjs/graphql';
import { AdminPanelHealthService } from 'src/engine/core-modules/admin-panel/admin-panel-health.service';
import { AdminPanelService } from 'src/engine/core-modules/admin-panel/admin-panel.service';
import { EnvironmentVariablesOutput } from 'src/engine/core-modules/admin-panel/dtos/environment-variables.output';
import { ImpersonateInput } from 'src/engine/core-modules/admin-panel/dtos/impersonate.input';
import { ImpersonateOutput } from 'src/engine/core-modules/admin-panel/dtos/impersonate.output';
import { SystemHealth } from 'src/engine/core-modules/admin-panel/dtos/system-health.dto';
import { UpdateWorkspaceFeatureFlagInput } from 'src/engine/core-modules/admin-panel/dtos/update-workspace-feature-flag.input';
import { UserLookup } from 'src/engine/core-modules/admin-panel/dtos/user-lookup.entity';
import { UserLookupInput } from 'src/engine/core-modules/admin-panel/dtos/user-lookup.input';
@ -13,10 +15,16 @@ import { ImpersonateGuard } from 'src/engine/guards/impersonate-guard';
import { UserAuthGuard } from 'src/engine/guards/user-auth.guard';
import { WorkspaceAuthGuard } from 'src/engine/guards/workspace-auth.guard';
import { AdminPanelHealthServiceData } from './dtos/admin-panel-health-service-data.dto';
import { AdminPanelIndicatorHealthStatusInputEnum } from './dtos/admin-panel-indicator-health-status.input';
@Resolver()
@UseFilters(AuthGraphqlApiExceptionFilter)
export class AdminPanelResolver {
constructor(private adminService: AdminPanelService) {}
constructor(
private adminService: AdminPanelService,
private adminPanelHealthService: AdminPanelHealthService,
) {}
@UseGuards(WorkspaceAuthGuard, UserAuthGuard, ImpersonateGuard)
@Mutation(() => ImpersonateOutput)
@ -53,4 +61,20 @@ export class AdminPanelResolver {
async getEnvironmentVariablesGrouped(): Promise<EnvironmentVariablesOutput> {
return this.adminService.getEnvironmentVariablesGrouped();
}
@UseGuards(WorkspaceAuthGuard, UserAuthGuard, ImpersonateGuard)
@Query(() => SystemHealth)
async getSystemHealthStatus(): Promise<SystemHealth> {
return this.adminPanelHealthService.getSystemHealthStatus();
}
@Query(() => AdminPanelHealthServiceData)
async getIndicatorHealthStatus(
@Args('indicatorName', {
type: () => AdminPanelIndicatorHealthStatusInputEnum,
})
indicatorName: AdminPanelIndicatorHealthStatusInputEnum,
): Promise<AdminPanelHealthServiceData> {
return this.adminPanelHealthService.getIndicatorHealthStatus(indicatorName);
}
}

View File

@ -0,0 +1,16 @@
import { Field, ObjectType } from '@nestjs/graphql';
import { AdminPanelWorkerQueueHealth } from 'src/engine/core-modules/admin-panel/dtos/admin-panel-worker-queue-health.dto';
import { AdminPanelHealthServiceStatus } from 'src/engine/core-modules/admin-panel/enums/admin-panel-health-service-status.enum';
@ObjectType()
export class AdminPanelHealthServiceData {
@Field(() => AdminPanelHealthServiceStatus)
status: AdminPanelHealthServiceStatus;
@Field(() => String, { nullable: true })
details?: string;
@Field(() => [AdminPanelWorkerQueueHealth], { nullable: true })
queues?: AdminPanelWorkerQueueHealth[];
}

View File

@ -0,0 +1,17 @@
import { Field, registerEnumType } from '@nestjs/graphql';
export enum AdminPanelIndicatorHealthStatusInputEnum {
DATABASE = 'database',
REDIS = 'redis',
WORKER = 'worker',
MESSAGE_SYNC = 'messageSync',
}
registerEnumType(AdminPanelIndicatorHealthStatusInputEnum, {
name: 'AdminPanelIndicatorHealthStatusInputEnum',
});
export class AdminPanelIndicatorHealthStatusInput {
@Field(() => AdminPanelIndicatorHealthStatusInputEnum)
indicatorName: AdminPanelIndicatorHealthStatusInputEnum;
}

View File

@ -0,0 +1,10 @@
import { Field, ObjectType } from '@nestjs/graphql';
import { AdminPanelHealthServiceStatus } from 'src/engine/core-modules/admin-panel/enums/admin-panel-health-service-status.enum';
import { WorkerQueueHealth } from 'src/engine/core-modules/health/types/worker-queue-health.type';
@ObjectType()
export class AdminPanelWorkerQueueHealth extends WorkerQueueHealth {
@Field(() => AdminPanelHealthServiceStatus)
status: AdminPanelHealthServiceStatus;
}

View File

@ -0,0 +1,18 @@
import { Field, ObjectType } from '@nestjs/graphql';
import { AdminPanelHealthServiceData } from 'src/engine/core-modules/admin-panel/dtos/admin-panel-health-service-data.dto';
@ObjectType()
export class SystemHealth {
@Field(() => AdminPanelHealthServiceData)
database: AdminPanelHealthServiceData;
@Field(() => AdminPanelHealthServiceData)
redis: AdminPanelHealthServiceData;
@Field(() => AdminPanelHealthServiceData)
worker: AdminPanelHealthServiceData;
@Field(() => AdminPanelHealthServiceData)
messageSync: AdminPanelHealthServiceData;
}

View File

@ -0,0 +1,10 @@
import { registerEnumType } from '@nestjs/graphql';
export enum AdminPanelHealthServiceStatus {
OPERATIONAL = 'operational',
OUTAGE = 'outage',
}
registerEnumType(AdminPanelHealthServiceStatus, {
name: 'AdminPanelHealthServiceStatus',
});