refactor + new account sync metrics + isolating health status inside folder admin-panel > health-status (#10314)

closes https://github.com/twentyhq/core-team-issues/issues/444
https://github.com/twentyhq/core-team-issues/issues/443
https://github.com/twentyhq/core-team-issues/issues/442
This commit is contained in:
nitin
2025-02-21 14:18:47 +05:30
committed by GitHub
parent 41bbb4b47f
commit c46f7848b7
57 changed files with 1441 additions and 833 deletions

View File

@ -9,4 +9,7 @@ export const HEALTH_ERROR_MESSAGES = {
MESSAGE_SYNC_TIMEOUT: 'Message sync check timeout',
MESSAGE_SYNC_CHECK_FAILED: 'Message sync check failed',
MESSAGE_SYNC_HIGH_FAILURE_RATE: 'High failure rate in message sync jobs',
CALENDAR_SYNC_TIMEOUT: 'Calendar sync check timeout',
CALENDAR_SYNC_CHECK_FAILED: 'Calendar sync check failed',
CALENDAR_SYNC_HIGH_FAILURE_RATE: 'High failure rate in calendar sync jobs',
} as const;

View File

@ -0,0 +1 @@
export const METRICS_FAILURE_RATE_THRESHOLD = 20;

View File

@ -2,6 +2,7 @@ import { HealthCheckService } from '@nestjs/terminus';
import { Test, TestingModule } from '@nestjs/testing';
import { HealthController } from 'src/engine/core-modules/health/controllers/health.controller';
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
@ -29,6 +30,10 @@ describe('HealthController', () => {
provide: WorkerHealthIndicator,
useValue: { isHealthy: jest.fn() },
},
{
provide: ConnectedAccountHealth,
useValue: { isHealthy: jest.fn() },
},
],
}).compile();

View File

@ -14,6 +14,7 @@ describe('MetricsController', () => {
provide: HealthCacheService,
useValue: {
getMessageChannelSyncJobByStatusCounter: jest.fn(),
getCalendarChannelSyncJobByStatusCounter: jest.fn(),
getInvalidCaptchaCounter: jest.fn(),
},
},

View File

@ -1,7 +1,8 @@
import { BadRequestException, Controller, Get, Param } from '@nestjs/common';
import { HealthCheck, HealthCheckService } from '@nestjs/terminus';
import { HealthServiceName } from 'src/engine/core-modules/health/enums/health-service-name.enum';
import { HealthIndicatorId } from 'src/engine/core-modules/health/enums/health-indicator-id.enum';
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
@ -13,6 +14,7 @@ export class HealthController {
private readonly databaseHealth: DatabaseHealthIndicator,
private readonly redisHealth: RedisHealthIndicator,
private readonly workerHealth: WorkerHealthIndicator,
private readonly connectedAccountHealth: ConnectedAccountHealth,
) {}
@Get()
@ -23,17 +25,19 @@ export class HealthController {
@Get('/:serviceName')
@HealthCheck()
checkService(@Param('serviceName') serviceName: HealthServiceName) {
checkService(@Param('indicatorId') indicatorId: HealthIndicatorId) {
const checks = {
[HealthServiceName.DATABASE]: () => this.databaseHealth.isHealthy(),
[HealthServiceName.REDIS]: () => this.redisHealth.isHealthy(),
[HealthServiceName.WORKER]: () => this.workerHealth.isHealthy(),
[HealthIndicatorId.database]: () => this.databaseHealth.isHealthy(),
[HealthIndicatorId.redis]: () => this.redisHealth.isHealthy(),
[HealthIndicatorId.worker]: () => this.workerHealth.isHealthy(),
[HealthIndicatorId.connectedAccount]: () =>
this.connectedAccountHealth.isHealthy(),
};
if (!(serviceName in checks)) {
throw new BadRequestException(`Invalid service name: ${serviceName}`);
if (!(indicatorId in checks)) {
throw new BadRequestException(`Invalid indicatorId: ${indicatorId}`);
}
return this.health.check([checks[serviceName]]);
return this.health.check([checks[indicatorId]]);
}
}

View File

@ -15,4 +15,9 @@ export class MetricsController {
getInvalidCaptchaCounter() {
return this.healthCacheService.getInvalidCaptchaCounter();
}
@Get('/calendar-channel-sync-job-by-status-counter')
getCalendarChannelSyncJobByStatusCounter() {
return this.healthCacheService.getCalendarChannelSyncJobByStatusCounter();
}
}

View File

@ -0,0 +1,12 @@
import { registerEnumType } from '@nestjs/graphql';
export enum HealthIndicatorId {
database = 'database',
redis = 'redis',
worker = 'worker',
connectedAccount = 'connectedAccount',
}
registerEnumType(HealthIndicatorId, {
name: 'HealthIndicatorId',
});

View File

@ -1,6 +0,0 @@
export enum HealthServiceName {
DATABASE = 'database',
REDIS = 'redis',
WORKER = 'worker',
MESSAGE_SYNC = 'messageSync',
}

View File

@ -4,8 +4,9 @@ import { InjectCacheStorage } from 'src/engine/core-modules/cache-storage/decora
import { CacheStorageService } from 'src/engine/core-modules/cache-storage/services/cache-storage.service';
import { CacheStorageNamespace } from 'src/engine/core-modules/cache-storage/types/cache-storage-namespace.enum';
import { EnvironmentService } from 'src/engine/core-modules/environment/environment.service';
import { AccountSyncJobByStatusCounter } from 'src/engine/core-modules/health/types/account-sync-metrics.types';
import { HealthCounterCacheKeys } from 'src/engine/core-modules/health/types/health-counter-cache-keys.type';
import { MessageChannelSyncJobByStatusCounter } from 'src/engine/core-modules/health/types/message-sync-metrics.types';
import { CalendarChannelSyncStatus } from 'src/modules/calendar/common/standard-objects/calendar-channel.workspace-entity';
import { MessageChannelSyncStatus } from 'src/modules/messaging/common/standard-objects/message-channel.workspace-entity';
@Injectable()
@ -48,9 +49,7 @@ export class HealthCacheService {
);
const currentCounter =
await this.cacheStorage.get<MessageChannelSyncJobByStatusCounter>(
cacheKey,
);
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(cacheKey);
const updatedCounter = {
...(currentCounter || {}),
@ -80,7 +79,7 @@ export class HealthCacheService {
for (const key of cacheKeys) {
const counter =
await this.cacheStorage.get<MessageChannelSyncJobByStatusCounter>(key);
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(key);
if (!counter) continue;
@ -130,4 +129,58 @@ export class HealthCacheService {
return aggregatedCounter;
}
async incrementCalendarChannelSyncJobByStatusCounter(
status: CalendarChannelSyncStatus,
increment: number,
) {
const cacheKey = this.getCacheKeyWithTimestamp(
HealthCounterCacheKeys.CalendarEventSyncJobByStatus,
);
const currentCounter =
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(cacheKey);
const updatedCounter = {
...(currentCounter || {}),
[status]: (currentCounter?.[status] || 0) + increment,
};
return await this.cacheStorage.set(
cacheKey,
updatedCounter,
this.healthCacheTtl,
);
}
async getCalendarChannelSyncJobByStatusCounter() {
const cacheKeys = this.getLastXMinutesTimestamps(
this.healthMonitoringTimeWindowInMinutes,
).map((timestamp) =>
this.getCacheKeyWithTimestamp(
HealthCounterCacheKeys.CalendarEventSyncJobByStatus,
timestamp,
),
);
const aggregatedCounter = Object.fromEntries(
Object.values(CalendarChannelSyncStatus).map((status) => [status, 0]),
);
for (const key of cacheKeys) {
const counter =
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(key);
if (!counter) continue;
for (const [status, count] of Object.entries(counter) as [
CalendarChannelSyncStatus,
number,
][]) {
aggregatedCounter[status] += count;
}
}
return aggregatedCounter;
}
}

View File

@ -3,11 +3,11 @@ import { TerminusModule } from '@nestjs/terminus';
import { HealthController } from 'src/engine/core-modules/health/controllers/health.controller';
import { MetricsController } from 'src/engine/core-modules/health/controllers/metrics.controller';
import { MessageSyncHealthIndicator } from 'src/engine/core-modules/health/indicators/message-sync.health';
import { RedisClientModule } from 'src/engine/core-modules/redis-client/redis-client.module';
import { HealthCacheService } from './health-cache.service';
import { ConnectedAccountHealth } from './indicators/connected-account.health';
import { DatabaseHealthIndicator } from './indicators/database.health';
import { RedisHealthIndicator } from './indicators/redis.health';
import { WorkerHealthIndicator } from './indicators/worker.health';
@ -19,14 +19,14 @@ import { WorkerHealthIndicator } from './indicators/worker.health';
DatabaseHealthIndicator,
RedisHealthIndicator,
WorkerHealthIndicator,
MessageSyncHealthIndicator,
ConnectedAccountHealth,
],
exports: [
HealthCacheService,
DatabaseHealthIndicator,
RedisHealthIndicator,
WorkerHealthIndicator,
MessageSyncHealthIndicator,
ConnectedAccountHealth,
],
})
export class HealthModule {}

View File

@ -0,0 +1,316 @@
import { HealthIndicatorService } from '@nestjs/terminus';
import { Test, TestingModule } from '@nestjs/testing';
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
import { HEALTH_INDICATORS_TIMEOUT } from 'src/engine/core-modules/health/constants/health-indicators-timeout.conts';
import { METRICS_FAILURE_RATE_THRESHOLD } from 'src/engine/core-modules/health/constants/metrics-failure-rate-threshold.const';
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
import { CalendarChannelSyncStatus } from 'src/modules/calendar/common/standard-objects/calendar-channel.workspace-entity';
import { MessageChannelSyncStatus } from 'src/modules/messaging/common/standard-objects/message-channel.workspace-entity';
describe('ConnectedAccountHealth', () => {
let service: ConnectedAccountHealth;
let healthCacheService: jest.Mocked<HealthCacheService>;
let healthIndicatorService: jest.Mocked<HealthIndicatorService>;
beforeEach(async () => {
healthCacheService = {
getMessageChannelSyncJobByStatusCounter: jest.fn(),
getCalendarChannelSyncJobByStatusCounter: jest.fn(),
} as any;
healthIndicatorService = {
check: jest.fn().mockImplementation((key) => ({
up: jest.fn().mockImplementation((data) => ({
[key]: {
status: 'up',
details: data.details,
},
})),
down: jest.fn().mockImplementation((data) => ({
[key]: {
status: 'down',
error: data.error,
details: data.details,
},
})),
})),
} as any;
const module: TestingModule = await Test.createTestingModule({
providers: [
ConnectedAccountHealth,
{
provide: HealthCacheService,
useValue: healthCacheService,
},
{
provide: HealthIndicatorService,
useValue: healthIndicatorService,
},
],
}).compile();
service = module.get<ConnectedAccountHealth>(ConnectedAccountHealth);
jest.useFakeTimers();
});
afterEach(() => {
jest.useRealTimers();
});
it('should be defined', () => {
expect(service).toBeDefined();
});
describe('message sync health', () => {
it('should return up status when no message sync jobs are present', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ONGOING]: 0,
[MessageChannelSyncStatus.ACTIVE]: 0,
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 0,
},
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
{
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
[CalendarChannelSyncStatus.ACTIVE]: 0,
},
);
const result = await service.isHealthy();
expect(result.connectedAccount.status).toBe('up');
expect(result.connectedAccount.details.messageSync.status).toBe('up');
expect(
result.connectedAccount.details.messageSync.details.totalJobs,
).toBe(0);
expect(
result.connectedAccount.details.messageSync.details.failedJobs,
).toBe(0);
expect(
result.connectedAccount.details.messageSync.details.failureRate,
).toBe(0);
});
it(`should return down status when message sync failure rate is above ${METRICS_FAILURE_RATE_THRESHOLD}%`, async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ONGOING]: 1,
[MessageChannelSyncStatus.ACTIVE]: 1,
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 2,
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 2,
},
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
{
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
[CalendarChannelSyncStatus.ACTIVE]: 1,
},
);
const result = await service.isHealthy();
expect(result.connectedAccount.status).toBe('down');
expect(result.connectedAccount.error).toBe(
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
);
expect(result.connectedAccount.details.messageSync.status).toBe('down');
expect(result.connectedAccount.details.messageSync.error).toBe(
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
);
expect(
result.connectedAccount.details.messageSync.details.failureRate,
).toBe(33.33);
});
});
describe('calendar sync health', () => {
it('should return up status when no calendar sync jobs are present', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ACTIVE]: 0,
},
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
{
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
[CalendarChannelSyncStatus.ONGOING]: 0,
[CalendarChannelSyncStatus.ACTIVE]: 0,
[CalendarChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 0,
},
);
const result = await service.isHealthy();
expect(result.connectedAccount.status).toBe('up');
expect(result.connectedAccount.details.calendarSync.status).toBe('up');
expect(
result.connectedAccount.details.calendarSync.details.totalJobs,
).toBe(0);
expect(
result.connectedAccount.details.calendarSync.details.failedJobs,
).toBe(0);
expect(
result.connectedAccount.details.calendarSync.details.failureRate,
).toBe(0);
});
it(`should return down status when calendar sync failure rate is above ${METRICS_FAILURE_RATE_THRESHOLD}%`, async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ACTIVE]: 1,
},
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
{
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
[CalendarChannelSyncStatus.ONGOING]: 1,
[CalendarChannelSyncStatus.ACTIVE]: 1,
[CalendarChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 2,
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 2,
},
);
const result = await service.isHealthy();
expect(result.connectedAccount.status).toBe('down');
expect(result.connectedAccount.error).toBe(
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE,
);
expect(result.connectedAccount.details.calendarSync.status).toBe('down');
expect(result.connectedAccount.details.calendarSync.error).toBe(
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE,
);
expect(
result.connectedAccount.details.calendarSync.details.failureRate,
).toBe(33.33);
});
});
describe('timeout handling', () => {
it('should handle message sync timeout', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockImplementationOnce(
() =>
new Promise((resolve) =>
setTimeout(resolve, HEALTH_INDICATORS_TIMEOUT + 100),
),
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
{
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
[CalendarChannelSyncStatus.ACTIVE]: 1,
},
);
const healthCheckPromise = service.isHealthy();
jest.advanceTimersByTime(HEALTH_INDICATORS_TIMEOUT + 1);
const result = await healthCheckPromise;
expect(result.connectedAccount.status).toBe('down');
expect(result.connectedAccount.error).toBe(
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
);
expect(result.connectedAccount.details.messageSync.status).toBe('down');
expect(result.connectedAccount.details.messageSync.error).toBe(
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
);
});
it('should handle calendar sync timeout', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ACTIVE]: 1,
},
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockImplementationOnce(
() =>
new Promise((resolve) =>
setTimeout(resolve, HEALTH_INDICATORS_TIMEOUT + 100),
),
);
const healthCheckPromise = service.isHealthy();
jest.advanceTimersByTime(HEALTH_INDICATORS_TIMEOUT + 1);
const result = await healthCheckPromise;
expect(result.connectedAccount.status).toBe('down');
expect(result.connectedAccount.error).toBe(
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT,
);
expect(result.connectedAccount.details.calendarSync.status).toBe('down');
expect(result.connectedAccount.details.calendarSync.error).toBe(
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT,
);
});
});
describe('combined health check', () => {
it('should return combined status with both checks healthy', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ACTIVE]: 8,
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 1,
},
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
{
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
[CalendarChannelSyncStatus.ACTIVE]: 8,
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 1,
},
);
const result = await service.isHealthy();
expect(result.connectedAccount.status).toBe('up');
expect(result.connectedAccount.details.messageSync.status).toBe('up');
expect(result.connectedAccount.details.calendarSync.status).toBe('up');
});
it('should return down status when both syncs fail', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ACTIVE]: 1,
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 2,
},
);
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
{
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
[CalendarChannelSyncStatus.ACTIVE]: 1,
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 2,
},
);
const result = await service.isHealthy();
expect(result.connectedAccount.status).toBe('down');
expect(result.connectedAccount.error).toBe(
`${HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE} and ${HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE}`,
);
expect(result.connectedAccount.details.messageSync.status).toBe('down');
expect(result.connectedAccount.details.calendarSync.status).toBe('down');
});
});
});

View File

@ -1,137 +0,0 @@
import { HealthIndicatorService } from '@nestjs/terminus';
import { Test, TestingModule } from '@nestjs/testing';
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
import { HEALTH_INDICATORS_TIMEOUT } from 'src/engine/core-modules/health/constants/health-indicators-timeout.conts';
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
import { MessageSyncHealthIndicator } from 'src/engine/core-modules/health/indicators/message-sync.health';
import { MessageChannelSyncStatus } from 'src/modules/messaging/common/standard-objects/message-channel.workspace-entity';
describe('MessageSyncHealthIndicator', () => {
let service: MessageSyncHealthIndicator;
let healthCacheService: jest.Mocked<HealthCacheService>;
let healthIndicatorService: jest.Mocked<HealthIndicatorService>;
beforeEach(async () => {
healthCacheService = {
getMessageChannelSyncJobByStatusCounter: jest.fn(),
} as any;
healthIndicatorService = {
check: jest.fn().mockReturnValue({
up: jest.fn().mockImplementation((data) => ({
messageSync: { status: 'up', ...data },
})),
down: jest.fn().mockImplementation((error) => ({
messageSync: { status: 'down', error },
})),
}),
} as any;
const module: TestingModule = await Test.createTestingModule({
providers: [
MessageSyncHealthIndicator,
{
provide: HealthCacheService,
useValue: healthCacheService,
},
{
provide: HealthIndicatorService,
useValue: healthIndicatorService,
},
],
}).compile();
service = module.get<MessageSyncHealthIndicator>(
MessageSyncHealthIndicator,
);
jest.useFakeTimers();
});
afterEach(() => {
jest.useRealTimers();
});
it('should be defined', () => {
expect(service).toBeDefined();
});
it('should return up status when no jobs are present', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ONGOING]: 0,
[MessageChannelSyncStatus.ACTIVE]: 0,
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 0,
},
);
const result = await service.isHealthy();
expect(result.messageSync.status).toBe('up');
expect(result.messageSync.details.totalJobs).toBe(0);
expect(result.messageSync.details.failedJobs).toBe(0);
expect(result.messageSync.details.failureRate).toBe(0);
});
it('should return up status when failure rate is below 20%', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ONGOING]: 2,
[MessageChannelSyncStatus.ACTIVE]: 8,
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 1,
},
);
const result = await service.isHealthy();
expect(result.messageSync.status).toBe('up');
expect(result.messageSync.details.totalJobs).toBe(11);
expect(result.messageSync.details.failedJobs).toBe(1);
expect(result.messageSync.details.failureRate).toBe(9.09);
});
it('should return down status when failure rate is above 20%', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
{
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
[MessageChannelSyncStatus.ONGOING]: 1,
[MessageChannelSyncStatus.ACTIVE]: 1,
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 2,
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 2,
},
);
const result = await service.isHealthy();
expect(result.messageSync.status).toBe('down');
expect(result.messageSync.error.error).toBe(
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
);
expect(result.messageSync.error.details).toBeDefined();
expect(result.messageSync.error.details.failureRate).toBe(33.33);
});
it('should timeout after specified duration', async () => {
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockImplementationOnce(
() =>
new Promise((resolve) =>
setTimeout(resolve, HEALTH_INDICATORS_TIMEOUT + 100),
),
);
const healthCheckPromise = service.isHealthy();
jest.advanceTimersByTime(HEALTH_INDICATORS_TIMEOUT + 1);
const result = await healthCheckPromise;
expect(result.messageSync.status).toBe('down');
expect(result.messageSync.error).toBe(
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
);
});
});

View File

@ -0,0 +1,155 @@
import { Injectable } from '@nestjs/common';
import {
HealthIndicatorResult,
HealthIndicatorService,
} from '@nestjs/terminus';
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
import { METRICS_FAILURE_RATE_THRESHOLD } from 'src/engine/core-modules/health/constants/metrics-failure-rate-threshold.const';
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
import { withHealthCheckTimeout } from 'src/engine/core-modules/health/utils/health-check-timeout.util';
@Injectable()
export class ConnectedAccountHealth {
constructor(
private readonly healthIndicatorService: HealthIndicatorService,
private readonly healthCacheService: HealthCacheService,
) {}
private async checkMessageSyncHealth(): Promise<HealthIndicatorResult> {
const indicator = this.healthIndicatorService.check('messageSync');
try {
const counters = await withHealthCheckTimeout(
this.healthCacheService.getMessageChannelSyncJobByStatusCounter(),
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
);
const totalJobs = Object.values(counters).reduce(
(sum, count) => sum + (count || 0),
0,
);
const failedJobs = counters.FAILED_UNKNOWN || 0;
// + (counters.FAILED_INSUFFICIENT_PERMISSIONS || 0)
const failureRate =
totalJobs > 0
? Math.round((failedJobs / totalJobs) * 100 * 100) / 100
: 0;
const details = {
counters,
totalJobs,
failedJobs,
failureRate,
};
if (totalJobs === 0 || failureRate < METRICS_FAILURE_RATE_THRESHOLD) {
return indicator.up({ details });
}
return indicator.down({
error: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
details,
});
} catch (error) {
const errorMessage =
error.message === HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
? HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_CHECK_FAILED;
return indicator.down({
error: errorMessage,
details: {},
});
}
}
private async checkCalendarSyncHealth(): Promise<HealthIndicatorResult> {
const indicator = this.healthIndicatorService.check('calendarSync');
try {
const counters = await withHealthCheckTimeout(
this.healthCacheService.getCalendarChannelSyncJobByStatusCounter(),
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT,
);
const totalJobs = Object.values(counters).reduce(
(sum, count) => sum + (count || 0),
0,
);
const failedJobs = counters.FAILED_UNKNOWN || 0;
// + (counters.FAILED_INSUFFICIENT_PERMISSIONS || 0)
const failureRate =
totalJobs > 0
? Math.round((failedJobs / totalJobs) * 100 * 100) / 100
: 0;
const details = {
counters,
totalJobs,
failedJobs,
failureRate,
};
if (totalJobs === 0 || failureRate < METRICS_FAILURE_RATE_THRESHOLD) {
return indicator.up({ details });
}
return indicator.down({
error: HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE,
details,
});
} catch (error) {
const errorMessage =
error.message === HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT
? HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT
: HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_CHECK_FAILED;
return indicator.down({
error: errorMessage,
details: {},
});
}
}
async isHealthy(): Promise<HealthIndicatorResult> {
const indicator = this.healthIndicatorService.check('connectedAccount');
const [messageResult, calendarResult] = await Promise.all([
this.checkMessageSyncHealth(),
this.checkCalendarSyncHealth(),
]);
const isMessageSyncDown = messageResult.messageSync.status === 'down';
const isCalendarSyncDown = calendarResult.calendarSync.status === 'down';
if (isMessageSyncDown || isCalendarSyncDown) {
let error: string;
if (isMessageSyncDown && isCalendarSyncDown) {
error = `${messageResult.messageSync.error} and ${calendarResult.calendarSync.error}`;
} else if (isMessageSyncDown) {
error = messageResult.messageSync.error;
} else {
error = calendarResult.calendarSync.error;
}
return indicator.down({
error,
details: {
messageSync: messageResult.messageSync,
calendarSync: calendarResult.calendarSync,
},
});
}
return indicator.up({
details: {
messageSync: messageResult.messageSync,
calendarSync: calendarResult.calendarSync,
},
});
}
}

View File

@ -1,63 +0,0 @@
import { Injectable } from '@nestjs/common';
import {
HealthIndicatorResult,
HealthIndicatorService,
} from '@nestjs/terminus';
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
import { withHealthCheckTimeout } from 'src/engine/core-modules/health/utils/health-check-timeout.util';
@Injectable()
export class MessageSyncHealthIndicator {
constructor(
private readonly healthIndicatorService: HealthIndicatorService,
private readonly healthCacheService: HealthCacheService,
) {}
async isHealthy(): Promise<HealthIndicatorResult> {
const indicator = this.healthIndicatorService.check('messageSync');
try {
const counters = await withHealthCheckTimeout(
this.healthCacheService.getMessageChannelSyncJobByStatusCounter(),
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
);
const totalJobs = Object.values(counters).reduce(
(sum, count) => sum + (count || 0),
0,
);
const failedJobs = counters.FAILED_UNKNOWN || 0;
// + (counters.FAILED_INSUFFICIENT_PERMISSIONS || 0)
const failureRate =
totalJobs > 0
? Math.round((failedJobs / totalJobs) * 100 * 100) / 100
: 0;
const details = {
counters,
totalJobs,
failedJobs,
failureRate,
};
if (totalJobs === 0 || failureRate < 20) {
return indicator.up({ details });
}
return indicator.down({
error: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
details,
});
} catch (error) {
const errorMessage =
error.message === HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
? HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_CHECK_FAILED;
return indicator.down(errorMessage);
}
}
}

View File

@ -71,7 +71,7 @@ export class WorkerHealthIndicator {
]);
queueStatuses.push({
name: queueName,
queueName: queueName,
workers: workers.length,
metrics: {
failed: failedCount,

View File

@ -1,7 +1,7 @@
import { Field, ObjectType } from '@nestjs/graphql';
@ObjectType()
export class MessageChannelSyncJobByStatusCounter {
export class AccountSyncJobByStatusCounter {
@Field(() => Number, { nullable: true })
NOT_SYNCED?: number;

View File

@ -1,4 +1,5 @@
export enum HealthCounterCacheKeys {
MessageChannelSyncJobByStatus = 'message-channel-sync-job-by-status',
InvalidCaptcha = 'invalid-captcha',
CalendarEventSyncJobByStatus = 'calendar-event-sync-job-by-status',
}

View File

@ -5,7 +5,7 @@ import { WorkerQueueMetrics } from 'src/engine/core-modules/health/types/worker-
@ObjectType()
export class WorkerQueueHealth {
@Field(() => String)
name: string;
queueName: string;
@Field(() => Number)
workers: number;