refactor + new account sync metrics + isolating health status inside folder admin-panel > health-status (#10314)
closes https://github.com/twentyhq/core-team-issues/issues/444 https://github.com/twentyhq/core-team-issues/issues/443 https://github.com/twentyhq/core-team-issues/issues/442
This commit is contained in:
@ -9,4 +9,7 @@ export const HEALTH_ERROR_MESSAGES = {
|
||||
MESSAGE_SYNC_TIMEOUT: 'Message sync check timeout',
|
||||
MESSAGE_SYNC_CHECK_FAILED: 'Message sync check failed',
|
||||
MESSAGE_SYNC_HIGH_FAILURE_RATE: 'High failure rate in message sync jobs',
|
||||
CALENDAR_SYNC_TIMEOUT: 'Calendar sync check timeout',
|
||||
CALENDAR_SYNC_CHECK_FAILED: 'Calendar sync check failed',
|
||||
CALENDAR_SYNC_HIGH_FAILURE_RATE: 'High failure rate in calendar sync jobs',
|
||||
} as const;
|
||||
|
||||
@ -0,0 +1 @@
|
||||
export const METRICS_FAILURE_RATE_THRESHOLD = 20;
|
||||
@ -2,6 +2,7 @@ import { HealthCheckService } from '@nestjs/terminus';
|
||||
import { Test, TestingModule } from '@nestjs/testing';
|
||||
|
||||
import { HealthController } from 'src/engine/core-modules/health/controllers/health.controller';
|
||||
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
|
||||
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
|
||||
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
|
||||
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
|
||||
@ -29,6 +30,10 @@ describe('HealthController', () => {
|
||||
provide: WorkerHealthIndicator,
|
||||
useValue: { isHealthy: jest.fn() },
|
||||
},
|
||||
{
|
||||
provide: ConnectedAccountHealth,
|
||||
useValue: { isHealthy: jest.fn() },
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
|
||||
@ -14,6 +14,7 @@ describe('MetricsController', () => {
|
||||
provide: HealthCacheService,
|
||||
useValue: {
|
||||
getMessageChannelSyncJobByStatusCounter: jest.fn(),
|
||||
getCalendarChannelSyncJobByStatusCounter: jest.fn(),
|
||||
getInvalidCaptchaCounter: jest.fn(),
|
||||
},
|
||||
},
|
||||
|
||||
@ -1,7 +1,8 @@
|
||||
import { BadRequestException, Controller, Get, Param } from '@nestjs/common';
|
||||
import { HealthCheck, HealthCheckService } from '@nestjs/terminus';
|
||||
|
||||
import { HealthServiceName } from 'src/engine/core-modules/health/enums/health-service-name.enum';
|
||||
import { HealthIndicatorId } from 'src/engine/core-modules/health/enums/health-indicator-id.enum';
|
||||
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
|
||||
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
|
||||
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
|
||||
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
|
||||
@ -13,6 +14,7 @@ export class HealthController {
|
||||
private readonly databaseHealth: DatabaseHealthIndicator,
|
||||
private readonly redisHealth: RedisHealthIndicator,
|
||||
private readonly workerHealth: WorkerHealthIndicator,
|
||||
private readonly connectedAccountHealth: ConnectedAccountHealth,
|
||||
) {}
|
||||
|
||||
@Get()
|
||||
@ -23,17 +25,19 @@ export class HealthController {
|
||||
|
||||
@Get('/:serviceName')
|
||||
@HealthCheck()
|
||||
checkService(@Param('serviceName') serviceName: HealthServiceName) {
|
||||
checkService(@Param('indicatorId') indicatorId: HealthIndicatorId) {
|
||||
const checks = {
|
||||
[HealthServiceName.DATABASE]: () => this.databaseHealth.isHealthy(),
|
||||
[HealthServiceName.REDIS]: () => this.redisHealth.isHealthy(),
|
||||
[HealthServiceName.WORKER]: () => this.workerHealth.isHealthy(),
|
||||
[HealthIndicatorId.database]: () => this.databaseHealth.isHealthy(),
|
||||
[HealthIndicatorId.redis]: () => this.redisHealth.isHealthy(),
|
||||
[HealthIndicatorId.worker]: () => this.workerHealth.isHealthy(),
|
||||
[HealthIndicatorId.connectedAccount]: () =>
|
||||
this.connectedAccountHealth.isHealthy(),
|
||||
};
|
||||
|
||||
if (!(serviceName in checks)) {
|
||||
throw new BadRequestException(`Invalid service name: ${serviceName}`);
|
||||
if (!(indicatorId in checks)) {
|
||||
throw new BadRequestException(`Invalid indicatorId: ${indicatorId}`);
|
||||
}
|
||||
|
||||
return this.health.check([checks[serviceName]]);
|
||||
return this.health.check([checks[indicatorId]]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -15,4 +15,9 @@ export class MetricsController {
|
||||
getInvalidCaptchaCounter() {
|
||||
return this.healthCacheService.getInvalidCaptchaCounter();
|
||||
}
|
||||
|
||||
@Get('/calendar-channel-sync-job-by-status-counter')
|
||||
getCalendarChannelSyncJobByStatusCounter() {
|
||||
return this.healthCacheService.getCalendarChannelSyncJobByStatusCounter();
|
||||
}
|
||||
}
|
||||
|
||||
@ -0,0 +1,12 @@
|
||||
import { registerEnumType } from '@nestjs/graphql';
|
||||
|
||||
export enum HealthIndicatorId {
|
||||
database = 'database',
|
||||
redis = 'redis',
|
||||
worker = 'worker',
|
||||
connectedAccount = 'connectedAccount',
|
||||
}
|
||||
|
||||
registerEnumType(HealthIndicatorId, {
|
||||
name: 'HealthIndicatorId',
|
||||
});
|
||||
@ -1,6 +0,0 @@
|
||||
export enum HealthServiceName {
|
||||
DATABASE = 'database',
|
||||
REDIS = 'redis',
|
||||
WORKER = 'worker',
|
||||
MESSAGE_SYNC = 'messageSync',
|
||||
}
|
||||
@ -4,8 +4,9 @@ import { InjectCacheStorage } from 'src/engine/core-modules/cache-storage/decora
|
||||
import { CacheStorageService } from 'src/engine/core-modules/cache-storage/services/cache-storage.service';
|
||||
import { CacheStorageNamespace } from 'src/engine/core-modules/cache-storage/types/cache-storage-namespace.enum';
|
||||
import { EnvironmentService } from 'src/engine/core-modules/environment/environment.service';
|
||||
import { AccountSyncJobByStatusCounter } from 'src/engine/core-modules/health/types/account-sync-metrics.types';
|
||||
import { HealthCounterCacheKeys } from 'src/engine/core-modules/health/types/health-counter-cache-keys.type';
|
||||
import { MessageChannelSyncJobByStatusCounter } from 'src/engine/core-modules/health/types/message-sync-metrics.types';
|
||||
import { CalendarChannelSyncStatus } from 'src/modules/calendar/common/standard-objects/calendar-channel.workspace-entity';
|
||||
import { MessageChannelSyncStatus } from 'src/modules/messaging/common/standard-objects/message-channel.workspace-entity';
|
||||
|
||||
@Injectable()
|
||||
@ -48,9 +49,7 @@ export class HealthCacheService {
|
||||
);
|
||||
|
||||
const currentCounter =
|
||||
await this.cacheStorage.get<MessageChannelSyncJobByStatusCounter>(
|
||||
cacheKey,
|
||||
);
|
||||
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(cacheKey);
|
||||
|
||||
const updatedCounter = {
|
||||
...(currentCounter || {}),
|
||||
@ -80,7 +79,7 @@ export class HealthCacheService {
|
||||
|
||||
for (const key of cacheKeys) {
|
||||
const counter =
|
||||
await this.cacheStorage.get<MessageChannelSyncJobByStatusCounter>(key);
|
||||
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(key);
|
||||
|
||||
if (!counter) continue;
|
||||
|
||||
@ -130,4 +129,58 @@ export class HealthCacheService {
|
||||
|
||||
return aggregatedCounter;
|
||||
}
|
||||
|
||||
async incrementCalendarChannelSyncJobByStatusCounter(
|
||||
status: CalendarChannelSyncStatus,
|
||||
increment: number,
|
||||
) {
|
||||
const cacheKey = this.getCacheKeyWithTimestamp(
|
||||
HealthCounterCacheKeys.CalendarEventSyncJobByStatus,
|
||||
);
|
||||
|
||||
const currentCounter =
|
||||
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(cacheKey);
|
||||
|
||||
const updatedCounter = {
|
||||
...(currentCounter || {}),
|
||||
[status]: (currentCounter?.[status] || 0) + increment,
|
||||
};
|
||||
|
||||
return await this.cacheStorage.set(
|
||||
cacheKey,
|
||||
updatedCounter,
|
||||
this.healthCacheTtl,
|
||||
);
|
||||
}
|
||||
|
||||
async getCalendarChannelSyncJobByStatusCounter() {
|
||||
const cacheKeys = this.getLastXMinutesTimestamps(
|
||||
this.healthMonitoringTimeWindowInMinutes,
|
||||
).map((timestamp) =>
|
||||
this.getCacheKeyWithTimestamp(
|
||||
HealthCounterCacheKeys.CalendarEventSyncJobByStatus,
|
||||
timestamp,
|
||||
),
|
||||
);
|
||||
|
||||
const aggregatedCounter = Object.fromEntries(
|
||||
Object.values(CalendarChannelSyncStatus).map((status) => [status, 0]),
|
||||
);
|
||||
|
||||
for (const key of cacheKeys) {
|
||||
const counter =
|
||||
await this.cacheStorage.get<AccountSyncJobByStatusCounter>(key);
|
||||
|
||||
if (!counter) continue;
|
||||
|
||||
for (const [status, count] of Object.entries(counter) as [
|
||||
CalendarChannelSyncStatus,
|
||||
number,
|
||||
][]) {
|
||||
aggregatedCounter[status] += count;
|
||||
}
|
||||
}
|
||||
|
||||
return aggregatedCounter;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3,11 +3,11 @@ import { TerminusModule } from '@nestjs/terminus';
|
||||
|
||||
import { HealthController } from 'src/engine/core-modules/health/controllers/health.controller';
|
||||
import { MetricsController } from 'src/engine/core-modules/health/controllers/metrics.controller';
|
||||
import { MessageSyncHealthIndicator } from 'src/engine/core-modules/health/indicators/message-sync.health';
|
||||
import { RedisClientModule } from 'src/engine/core-modules/redis-client/redis-client.module';
|
||||
|
||||
import { HealthCacheService } from './health-cache.service';
|
||||
|
||||
import { ConnectedAccountHealth } from './indicators/connected-account.health';
|
||||
import { DatabaseHealthIndicator } from './indicators/database.health';
|
||||
import { RedisHealthIndicator } from './indicators/redis.health';
|
||||
import { WorkerHealthIndicator } from './indicators/worker.health';
|
||||
@ -19,14 +19,14 @@ import { WorkerHealthIndicator } from './indicators/worker.health';
|
||||
DatabaseHealthIndicator,
|
||||
RedisHealthIndicator,
|
||||
WorkerHealthIndicator,
|
||||
MessageSyncHealthIndicator,
|
||||
ConnectedAccountHealth,
|
||||
],
|
||||
exports: [
|
||||
HealthCacheService,
|
||||
DatabaseHealthIndicator,
|
||||
RedisHealthIndicator,
|
||||
WorkerHealthIndicator,
|
||||
MessageSyncHealthIndicator,
|
||||
ConnectedAccountHealth,
|
||||
],
|
||||
})
|
||||
export class HealthModule {}
|
||||
|
||||
@ -0,0 +1,316 @@
|
||||
import { HealthIndicatorService } from '@nestjs/terminus';
|
||||
import { Test, TestingModule } from '@nestjs/testing';
|
||||
|
||||
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
|
||||
import { HEALTH_INDICATORS_TIMEOUT } from 'src/engine/core-modules/health/constants/health-indicators-timeout.conts';
|
||||
import { METRICS_FAILURE_RATE_THRESHOLD } from 'src/engine/core-modules/health/constants/metrics-failure-rate-threshold.const';
|
||||
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
|
||||
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
|
||||
import { CalendarChannelSyncStatus } from 'src/modules/calendar/common/standard-objects/calendar-channel.workspace-entity';
|
||||
import { MessageChannelSyncStatus } from 'src/modules/messaging/common/standard-objects/message-channel.workspace-entity';
|
||||
|
||||
describe('ConnectedAccountHealth', () => {
|
||||
let service: ConnectedAccountHealth;
|
||||
let healthCacheService: jest.Mocked<HealthCacheService>;
|
||||
let healthIndicatorService: jest.Mocked<HealthIndicatorService>;
|
||||
|
||||
beforeEach(async () => {
|
||||
healthCacheService = {
|
||||
getMessageChannelSyncJobByStatusCounter: jest.fn(),
|
||||
getCalendarChannelSyncJobByStatusCounter: jest.fn(),
|
||||
} as any;
|
||||
|
||||
healthIndicatorService = {
|
||||
check: jest.fn().mockImplementation((key) => ({
|
||||
up: jest.fn().mockImplementation((data) => ({
|
||||
[key]: {
|
||||
status: 'up',
|
||||
details: data.details,
|
||||
},
|
||||
})),
|
||||
down: jest.fn().mockImplementation((data) => ({
|
||||
[key]: {
|
||||
status: 'down',
|
||||
error: data.error,
|
||||
details: data.details,
|
||||
},
|
||||
})),
|
||||
})),
|
||||
} as any;
|
||||
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
ConnectedAccountHealth,
|
||||
{
|
||||
provide: HealthCacheService,
|
||||
useValue: healthCacheService,
|
||||
},
|
||||
{
|
||||
provide: HealthIndicatorService,
|
||||
useValue: healthIndicatorService,
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
service = module.get<ConnectedAccountHealth>(ConnectedAccountHealth);
|
||||
jest.useFakeTimers();
|
||||
});
|
||||
afterEach(() => {
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
it('should be defined', () => {
|
||||
expect(service).toBeDefined();
|
||||
});
|
||||
|
||||
describe('message sync health', () => {
|
||||
it('should return up status when no message sync jobs are present', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ONGOING]: 0,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 0,
|
||||
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
|
||||
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 0,
|
||||
},
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[CalendarChannelSyncStatus.ACTIVE]: 0,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.connectedAccount.status).toBe('up');
|
||||
expect(result.connectedAccount.details.messageSync.status).toBe('up');
|
||||
expect(
|
||||
result.connectedAccount.details.messageSync.details.totalJobs,
|
||||
).toBe(0);
|
||||
expect(
|
||||
result.connectedAccount.details.messageSync.details.failedJobs,
|
||||
).toBe(0);
|
||||
expect(
|
||||
result.connectedAccount.details.messageSync.details.failureRate,
|
||||
).toBe(0);
|
||||
});
|
||||
|
||||
it(`should return down status when message sync failure rate is above ${METRICS_FAILURE_RATE_THRESHOLD}%`, async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ONGOING]: 1,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 1,
|
||||
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 2,
|
||||
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 2,
|
||||
},
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[CalendarChannelSyncStatus.ACTIVE]: 1,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.connectedAccount.status).toBe('down');
|
||||
expect(result.connectedAccount.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
|
||||
);
|
||||
expect(result.connectedAccount.details.messageSync.status).toBe('down');
|
||||
expect(result.connectedAccount.details.messageSync.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
|
||||
);
|
||||
expect(
|
||||
result.connectedAccount.details.messageSync.details.failureRate,
|
||||
).toBe(33.33);
|
||||
});
|
||||
});
|
||||
|
||||
describe('calendar sync health', () => {
|
||||
it('should return up status when no calendar sync jobs are present', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 0,
|
||||
},
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[CalendarChannelSyncStatus.ONGOING]: 0,
|
||||
[CalendarChannelSyncStatus.ACTIVE]: 0,
|
||||
[CalendarChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
|
||||
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 0,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.connectedAccount.status).toBe('up');
|
||||
expect(result.connectedAccount.details.calendarSync.status).toBe('up');
|
||||
expect(
|
||||
result.connectedAccount.details.calendarSync.details.totalJobs,
|
||||
).toBe(0);
|
||||
expect(
|
||||
result.connectedAccount.details.calendarSync.details.failedJobs,
|
||||
).toBe(0);
|
||||
expect(
|
||||
result.connectedAccount.details.calendarSync.details.failureRate,
|
||||
).toBe(0);
|
||||
});
|
||||
|
||||
it(`should return down status when calendar sync failure rate is above ${METRICS_FAILURE_RATE_THRESHOLD}%`, async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 1,
|
||||
},
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[CalendarChannelSyncStatus.ONGOING]: 1,
|
||||
[CalendarChannelSyncStatus.ACTIVE]: 1,
|
||||
[CalendarChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 2,
|
||||
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 2,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.connectedAccount.status).toBe('down');
|
||||
expect(result.connectedAccount.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE,
|
||||
);
|
||||
expect(result.connectedAccount.details.calendarSync.status).toBe('down');
|
||||
expect(result.connectedAccount.details.calendarSync.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE,
|
||||
);
|
||||
expect(
|
||||
result.connectedAccount.details.calendarSync.details.failureRate,
|
||||
).toBe(33.33);
|
||||
});
|
||||
});
|
||||
|
||||
describe('timeout handling', () => {
|
||||
it('should handle message sync timeout', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockImplementationOnce(
|
||||
() =>
|
||||
new Promise((resolve) =>
|
||||
setTimeout(resolve, HEALTH_INDICATORS_TIMEOUT + 100),
|
||||
),
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[CalendarChannelSyncStatus.ACTIVE]: 1,
|
||||
},
|
||||
);
|
||||
|
||||
const healthCheckPromise = service.isHealthy();
|
||||
|
||||
jest.advanceTimersByTime(HEALTH_INDICATORS_TIMEOUT + 1);
|
||||
const result = await healthCheckPromise;
|
||||
|
||||
expect(result.connectedAccount.status).toBe('down');
|
||||
expect(result.connectedAccount.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
|
||||
);
|
||||
expect(result.connectedAccount.details.messageSync.status).toBe('down');
|
||||
expect(result.connectedAccount.details.messageSync.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
|
||||
);
|
||||
});
|
||||
|
||||
it('should handle calendar sync timeout', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 1,
|
||||
},
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockImplementationOnce(
|
||||
() =>
|
||||
new Promise((resolve) =>
|
||||
setTimeout(resolve, HEALTH_INDICATORS_TIMEOUT + 100),
|
||||
),
|
||||
);
|
||||
|
||||
const healthCheckPromise = service.isHealthy();
|
||||
|
||||
jest.advanceTimersByTime(HEALTH_INDICATORS_TIMEOUT + 1);
|
||||
const result = await healthCheckPromise;
|
||||
|
||||
expect(result.connectedAccount.status).toBe('down');
|
||||
expect(result.connectedAccount.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT,
|
||||
);
|
||||
expect(result.connectedAccount.details.calendarSync.status).toBe('down');
|
||||
expect(result.connectedAccount.details.calendarSync.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('combined health check', () => {
|
||||
it('should return combined status with both checks healthy', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 8,
|
||||
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 1,
|
||||
},
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[CalendarChannelSyncStatus.ACTIVE]: 8,
|
||||
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 1,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.connectedAccount.status).toBe('up');
|
||||
expect(result.connectedAccount.details.messageSync.status).toBe('up');
|
||||
expect(result.connectedAccount.details.calendarSync.status).toBe('up');
|
||||
});
|
||||
|
||||
it('should return down status when both syncs fail', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 1,
|
||||
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 2,
|
||||
},
|
||||
);
|
||||
|
||||
healthCacheService.getCalendarChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[CalendarChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[CalendarChannelSyncStatus.ACTIVE]: 1,
|
||||
[CalendarChannelSyncStatus.FAILED_UNKNOWN]: 2,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.connectedAccount.status).toBe('down');
|
||||
expect(result.connectedAccount.error).toBe(
|
||||
`${HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE} and ${HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE}`,
|
||||
);
|
||||
expect(result.connectedAccount.details.messageSync.status).toBe('down');
|
||||
expect(result.connectedAccount.details.calendarSync.status).toBe('down');
|
||||
});
|
||||
});
|
||||
});
|
||||
@ -1,137 +0,0 @@
|
||||
import { HealthIndicatorService } from '@nestjs/terminus';
|
||||
import { Test, TestingModule } from '@nestjs/testing';
|
||||
|
||||
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
|
||||
import { HEALTH_INDICATORS_TIMEOUT } from 'src/engine/core-modules/health/constants/health-indicators-timeout.conts';
|
||||
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
|
||||
import { MessageSyncHealthIndicator } from 'src/engine/core-modules/health/indicators/message-sync.health';
|
||||
import { MessageChannelSyncStatus } from 'src/modules/messaging/common/standard-objects/message-channel.workspace-entity';
|
||||
|
||||
describe('MessageSyncHealthIndicator', () => {
|
||||
let service: MessageSyncHealthIndicator;
|
||||
let healthCacheService: jest.Mocked<HealthCacheService>;
|
||||
let healthIndicatorService: jest.Mocked<HealthIndicatorService>;
|
||||
|
||||
beforeEach(async () => {
|
||||
healthCacheService = {
|
||||
getMessageChannelSyncJobByStatusCounter: jest.fn(),
|
||||
} as any;
|
||||
|
||||
healthIndicatorService = {
|
||||
check: jest.fn().mockReturnValue({
|
||||
up: jest.fn().mockImplementation((data) => ({
|
||||
messageSync: { status: 'up', ...data },
|
||||
})),
|
||||
down: jest.fn().mockImplementation((error) => ({
|
||||
messageSync: { status: 'down', error },
|
||||
})),
|
||||
}),
|
||||
} as any;
|
||||
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
MessageSyncHealthIndicator,
|
||||
{
|
||||
provide: HealthCacheService,
|
||||
useValue: healthCacheService,
|
||||
},
|
||||
{
|
||||
provide: HealthIndicatorService,
|
||||
useValue: healthIndicatorService,
|
||||
},
|
||||
],
|
||||
}).compile();
|
||||
|
||||
service = module.get<MessageSyncHealthIndicator>(
|
||||
MessageSyncHealthIndicator,
|
||||
);
|
||||
jest.useFakeTimers();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.useRealTimers();
|
||||
});
|
||||
|
||||
it('should be defined', () => {
|
||||
expect(service).toBeDefined();
|
||||
});
|
||||
|
||||
it('should return up status when no jobs are present', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ONGOING]: 0,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 0,
|
||||
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
|
||||
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 0,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.messageSync.status).toBe('up');
|
||||
expect(result.messageSync.details.totalJobs).toBe(0);
|
||||
expect(result.messageSync.details.failedJobs).toBe(0);
|
||||
expect(result.messageSync.details.failureRate).toBe(0);
|
||||
});
|
||||
|
||||
it('should return up status when failure rate is below 20%', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ONGOING]: 2,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 8,
|
||||
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 0,
|
||||
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 1,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.messageSync.status).toBe('up');
|
||||
expect(result.messageSync.details.totalJobs).toBe(11);
|
||||
expect(result.messageSync.details.failedJobs).toBe(1);
|
||||
expect(result.messageSync.details.failureRate).toBe(9.09);
|
||||
});
|
||||
|
||||
it('should return down status when failure rate is above 20%', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockResolvedValue(
|
||||
{
|
||||
[MessageChannelSyncStatus.NOT_SYNCED]: 0,
|
||||
[MessageChannelSyncStatus.ONGOING]: 1,
|
||||
[MessageChannelSyncStatus.ACTIVE]: 1,
|
||||
[MessageChannelSyncStatus.FAILED_INSUFFICIENT_PERMISSIONS]: 2,
|
||||
[MessageChannelSyncStatus.FAILED_UNKNOWN]: 2,
|
||||
},
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.messageSync.status).toBe('down');
|
||||
expect(result.messageSync.error.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
|
||||
);
|
||||
expect(result.messageSync.error.details).toBeDefined();
|
||||
expect(result.messageSync.error.details.failureRate).toBe(33.33);
|
||||
});
|
||||
|
||||
it('should timeout after specified duration', async () => {
|
||||
healthCacheService.getMessageChannelSyncJobByStatusCounter.mockImplementationOnce(
|
||||
() =>
|
||||
new Promise((resolve) =>
|
||||
setTimeout(resolve, HEALTH_INDICATORS_TIMEOUT + 100),
|
||||
),
|
||||
);
|
||||
|
||||
const healthCheckPromise = service.isHealthy();
|
||||
|
||||
jest.advanceTimersByTime(HEALTH_INDICATORS_TIMEOUT + 1);
|
||||
|
||||
const result = await healthCheckPromise;
|
||||
|
||||
expect(result.messageSync.status).toBe('down');
|
||||
expect(result.messageSync.error).toBe(
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
|
||||
);
|
||||
});
|
||||
});
|
||||
@ -0,0 +1,155 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import {
|
||||
HealthIndicatorResult,
|
||||
HealthIndicatorService,
|
||||
} from '@nestjs/terminus';
|
||||
|
||||
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
|
||||
import { METRICS_FAILURE_RATE_THRESHOLD } from 'src/engine/core-modules/health/constants/metrics-failure-rate-threshold.const';
|
||||
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
|
||||
import { withHealthCheckTimeout } from 'src/engine/core-modules/health/utils/health-check-timeout.util';
|
||||
|
||||
@Injectable()
|
||||
export class ConnectedAccountHealth {
|
||||
constructor(
|
||||
private readonly healthIndicatorService: HealthIndicatorService,
|
||||
private readonly healthCacheService: HealthCacheService,
|
||||
) {}
|
||||
|
||||
private async checkMessageSyncHealth(): Promise<HealthIndicatorResult> {
|
||||
const indicator = this.healthIndicatorService.check('messageSync');
|
||||
|
||||
try {
|
||||
const counters = await withHealthCheckTimeout(
|
||||
this.healthCacheService.getMessageChannelSyncJobByStatusCounter(),
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
|
||||
);
|
||||
|
||||
const totalJobs = Object.values(counters).reduce(
|
||||
(sum, count) => sum + (count || 0),
|
||||
0,
|
||||
);
|
||||
|
||||
const failedJobs = counters.FAILED_UNKNOWN || 0;
|
||||
// + (counters.FAILED_INSUFFICIENT_PERMISSIONS || 0)
|
||||
|
||||
const failureRate =
|
||||
totalJobs > 0
|
||||
? Math.round((failedJobs / totalJobs) * 100 * 100) / 100
|
||||
: 0;
|
||||
const details = {
|
||||
counters,
|
||||
totalJobs,
|
||||
failedJobs,
|
||||
failureRate,
|
||||
};
|
||||
|
||||
if (totalJobs === 0 || failureRate < METRICS_FAILURE_RATE_THRESHOLD) {
|
||||
return indicator.up({ details });
|
||||
}
|
||||
|
||||
return indicator.down({
|
||||
error: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
|
||||
details,
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
error.message === HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
|
||||
? HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
|
||||
: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_CHECK_FAILED;
|
||||
|
||||
return indicator.down({
|
||||
error: errorMessage,
|
||||
details: {},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
private async checkCalendarSyncHealth(): Promise<HealthIndicatorResult> {
|
||||
const indicator = this.healthIndicatorService.check('calendarSync');
|
||||
|
||||
try {
|
||||
const counters = await withHealthCheckTimeout(
|
||||
this.healthCacheService.getCalendarChannelSyncJobByStatusCounter(),
|
||||
HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT,
|
||||
);
|
||||
|
||||
const totalJobs = Object.values(counters).reduce(
|
||||
(sum, count) => sum + (count || 0),
|
||||
0,
|
||||
);
|
||||
|
||||
const failedJobs = counters.FAILED_UNKNOWN || 0;
|
||||
// + (counters.FAILED_INSUFFICIENT_PERMISSIONS || 0)
|
||||
|
||||
const failureRate =
|
||||
totalJobs > 0
|
||||
? Math.round((failedJobs / totalJobs) * 100 * 100) / 100
|
||||
: 0;
|
||||
const details = {
|
||||
counters,
|
||||
totalJobs,
|
||||
failedJobs,
|
||||
failureRate,
|
||||
};
|
||||
|
||||
if (totalJobs === 0 || failureRate < METRICS_FAILURE_RATE_THRESHOLD) {
|
||||
return indicator.up({ details });
|
||||
}
|
||||
|
||||
return indicator.down({
|
||||
error: HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_HIGH_FAILURE_RATE,
|
||||
details,
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
error.message === HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT
|
||||
? HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_TIMEOUT
|
||||
: HEALTH_ERROR_MESSAGES.CALENDAR_SYNC_CHECK_FAILED;
|
||||
|
||||
return indicator.down({
|
||||
error: errorMessage,
|
||||
details: {},
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
async isHealthy(): Promise<HealthIndicatorResult> {
|
||||
const indicator = this.healthIndicatorService.check('connectedAccount');
|
||||
|
||||
const [messageResult, calendarResult] = await Promise.all([
|
||||
this.checkMessageSyncHealth(),
|
||||
this.checkCalendarSyncHealth(),
|
||||
]);
|
||||
|
||||
const isMessageSyncDown = messageResult.messageSync.status === 'down';
|
||||
const isCalendarSyncDown = calendarResult.calendarSync.status === 'down';
|
||||
|
||||
if (isMessageSyncDown || isCalendarSyncDown) {
|
||||
let error: string;
|
||||
|
||||
if (isMessageSyncDown && isCalendarSyncDown) {
|
||||
error = `${messageResult.messageSync.error} and ${calendarResult.calendarSync.error}`;
|
||||
} else if (isMessageSyncDown) {
|
||||
error = messageResult.messageSync.error;
|
||||
} else {
|
||||
error = calendarResult.calendarSync.error;
|
||||
}
|
||||
|
||||
return indicator.down({
|
||||
error,
|
||||
details: {
|
||||
messageSync: messageResult.messageSync,
|
||||
calendarSync: calendarResult.calendarSync,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
return indicator.up({
|
||||
details: {
|
||||
messageSync: messageResult.messageSync,
|
||||
calendarSync: calendarResult.calendarSync,
|
||||
},
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -1,63 +0,0 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import {
|
||||
HealthIndicatorResult,
|
||||
HealthIndicatorService,
|
||||
} from '@nestjs/terminus';
|
||||
|
||||
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
|
||||
import { HealthCacheService } from 'src/engine/core-modules/health/health-cache.service';
|
||||
import { withHealthCheckTimeout } from 'src/engine/core-modules/health/utils/health-check-timeout.util';
|
||||
|
||||
@Injectable()
|
||||
export class MessageSyncHealthIndicator {
|
||||
constructor(
|
||||
private readonly healthIndicatorService: HealthIndicatorService,
|
||||
private readonly healthCacheService: HealthCacheService,
|
||||
) {}
|
||||
|
||||
async isHealthy(): Promise<HealthIndicatorResult> {
|
||||
const indicator = this.healthIndicatorService.check('messageSync');
|
||||
|
||||
try {
|
||||
const counters = await withHealthCheckTimeout(
|
||||
this.healthCacheService.getMessageChannelSyncJobByStatusCounter(),
|
||||
HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT,
|
||||
);
|
||||
|
||||
const totalJobs = Object.values(counters).reduce(
|
||||
(sum, count) => sum + (count || 0),
|
||||
0,
|
||||
);
|
||||
|
||||
const failedJobs = counters.FAILED_UNKNOWN || 0;
|
||||
// + (counters.FAILED_INSUFFICIENT_PERMISSIONS || 0)
|
||||
|
||||
const failureRate =
|
||||
totalJobs > 0
|
||||
? Math.round((failedJobs / totalJobs) * 100 * 100) / 100
|
||||
: 0;
|
||||
const details = {
|
||||
counters,
|
||||
totalJobs,
|
||||
failedJobs,
|
||||
failureRate,
|
||||
};
|
||||
|
||||
if (totalJobs === 0 || failureRate < 20) {
|
||||
return indicator.up({ details });
|
||||
}
|
||||
|
||||
return indicator.down({
|
||||
error: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_HIGH_FAILURE_RATE,
|
||||
details,
|
||||
});
|
||||
} catch (error) {
|
||||
const errorMessage =
|
||||
error.message === HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
|
||||
? HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_TIMEOUT
|
||||
: HEALTH_ERROR_MESSAGES.MESSAGE_SYNC_CHECK_FAILED;
|
||||
|
||||
return indicator.down(errorMessage);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -71,7 +71,7 @@ export class WorkerHealthIndicator {
|
||||
]);
|
||||
|
||||
queueStatuses.push({
|
||||
name: queueName,
|
||||
queueName: queueName,
|
||||
workers: workers.length,
|
||||
metrics: {
|
||||
failed: failedCount,
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
import { Field, ObjectType } from '@nestjs/graphql';
|
||||
|
||||
@ObjectType()
|
||||
export class MessageChannelSyncJobByStatusCounter {
|
||||
export class AccountSyncJobByStatusCounter {
|
||||
@Field(() => Number, { nullable: true })
|
||||
NOT_SYNCED?: number;
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
export enum HealthCounterCacheKeys {
|
||||
MessageChannelSyncJobByStatus = 'message-channel-sync-job-by-status',
|
||||
InvalidCaptcha = 'invalid-captcha',
|
||||
CalendarEventSyncJobByStatus = 'calendar-event-sync-job-by-status',
|
||||
}
|
||||
|
||||
@ -5,7 +5,7 @@ import { WorkerQueueMetrics } from 'src/engine/core-modules/health/types/worker-
|
||||
@ObjectType()
|
||||
export class WorkerQueueHealth {
|
||||
@Field(() => String)
|
||||
name: string;
|
||||
queueName: string;
|
||||
|
||||
@Field(() => Number)
|
||||
workers: number;
|
||||
|
||||
Reference in New Issue
Block a user