Health status worker metrics improvements (#10442)
Co-authored-by: Félix Malfait <felix.malfait@gmail.com>
This commit is contained in:
@ -1,15 +1,24 @@
|
||||
import { Test, TestingModule } from '@nestjs/testing';
|
||||
|
||||
import { Queue } from 'bullmq';
|
||||
import { Redis } from 'ioredis';
|
||||
|
||||
import { AdminPanelHealthService } from 'src/engine/core-modules/admin-panel/admin-panel-health.service';
|
||||
import { HEALTH_INDICATORS } from 'src/engine/core-modules/admin-panel/constants/health-indicators.constants';
|
||||
import { SystemHealth } from 'src/engine/core-modules/admin-panel/dtos/system-health.dto';
|
||||
import { AdminPanelHealthServiceStatus } from 'src/engine/core-modules/admin-panel/enums/admin-panel-health-service-status.enum';
|
||||
import { QueueMetricsTimeRange } from 'src/engine/core-modules/admin-panel/enums/queue-metrics-time-range.enum';
|
||||
import { EnvironmentService } from 'src/engine/core-modules/environment/environment.service';
|
||||
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
|
||||
import { HealthIndicatorId } from 'src/engine/core-modules/health/enums/health-indicator-id.enum';
|
||||
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
|
||||
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
|
||||
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
|
||||
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
|
||||
import { MessageQueue } from 'src/engine/core-modules/message-queue/message-queue.constants';
|
||||
import { RedisClientService } from 'src/engine/core-modules/redis-client/redis-client.service';
|
||||
|
||||
jest.mock('bullmq');
|
||||
|
||||
describe('AdminPanelHealthService', () => {
|
||||
let service: AdminPanelHealthService;
|
||||
@ -17,12 +26,25 @@ describe('AdminPanelHealthService', () => {
|
||||
let redisHealth: jest.Mocked<RedisHealthIndicator>;
|
||||
let workerHealth: jest.Mocked<WorkerHealthIndicator>;
|
||||
let connectedAccountHealth: jest.Mocked<ConnectedAccountHealth>;
|
||||
let redisClient: jest.Mocked<RedisClientService>;
|
||||
let environmentService: jest.Mocked<EnvironmentService>;
|
||||
let loggerSpy: jest.SpyInstance;
|
||||
|
||||
beforeEach(async () => {
|
||||
databaseHealth = { isHealthy: jest.fn() } as any;
|
||||
redisHealth = { isHealthy: jest.fn() } as any;
|
||||
workerHealth = { isHealthy: jest.fn() } as any;
|
||||
workerHealth = { isHealthy: jest.fn(), getQueueDetails: jest.fn() } as any;
|
||||
connectedAccountHealth = { isHealthy: jest.fn() } as any;
|
||||
redisClient = {
|
||||
getClient: jest.fn().mockReturnValue({} as Redis),
|
||||
} as any;
|
||||
environmentService = { get: jest.fn() } as any;
|
||||
|
||||
(Queue as unknown as jest.Mock) = jest.fn().mockImplementation(() => ({
|
||||
getMetrics: jest.fn(),
|
||||
getWorkers: jest.fn(),
|
||||
close: jest.fn(),
|
||||
}));
|
||||
|
||||
const module: TestingModule = await Test.createTestingModule({
|
||||
providers: [
|
||||
@ -31,10 +53,21 @@ describe('AdminPanelHealthService', () => {
|
||||
{ provide: RedisHealthIndicator, useValue: redisHealth },
|
||||
{ provide: WorkerHealthIndicator, useValue: workerHealth },
|
||||
{ provide: ConnectedAccountHealth, useValue: connectedAccountHealth },
|
||||
{ provide: RedisClientService, useValue: redisClient },
|
||||
{ provide: EnvironmentService, useValue: environmentService },
|
||||
],
|
||||
}).compile();
|
||||
|
||||
service = module.get<AdminPanelHealthService>(AdminPanelHealthService);
|
||||
|
||||
loggerSpy = jest
|
||||
.spyOn(service['logger'], 'error')
|
||||
.mockImplementation(() => {});
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.clearAllMocks();
|
||||
loggerSpy.mockRestore();
|
||||
});
|
||||
|
||||
it('should be defined', () => {
|
||||
@ -62,8 +95,9 @@ describe('AdminPanelHealthService', () => {
|
||||
delayed: 4,
|
||||
failed: 3,
|
||||
waiting: 0,
|
||||
prioritized: 0,
|
||||
failureRate: 0.3,
|
||||
},
|
||||
status: 'up',
|
||||
},
|
||||
],
|
||||
},
|
||||
@ -209,26 +243,12 @@ describe('AdminPanelHealthService', () => {
|
||||
{
|
||||
queueName: 'queue1',
|
||||
workers: 2,
|
||||
metrics: {
|
||||
active: 1,
|
||||
completed: 10,
|
||||
delayed: 0,
|
||||
failed: 2,
|
||||
waiting: 5,
|
||||
prioritized: 1,
|
||||
},
|
||||
status: 'up',
|
||||
},
|
||||
{
|
||||
queueName: 'queue2',
|
||||
workers: 0,
|
||||
metrics: {
|
||||
active: 0,
|
||||
completed: 5,
|
||||
delayed: 0,
|
||||
failed: 1,
|
||||
waiting: 2,
|
||||
prioritized: 0,
|
||||
},
|
||||
status: 'up',
|
||||
},
|
||||
];
|
||||
|
||||
@ -248,8 +268,8 @@ describe('AdminPanelHealthService', () => {
|
||||
status: AdminPanelHealthServiceStatus.OPERATIONAL,
|
||||
details: undefined,
|
||||
queues: mockQueues.map((queue) => ({
|
||||
...queue,
|
||||
id: `worker-${queue.queueName}`,
|
||||
queueName: queue.queueName,
|
||||
status:
|
||||
queue.workers > 0
|
||||
? AdminPanelHealthServiceStatus.OPERATIONAL
|
||||
@ -281,4 +301,305 @@ describe('AdminPanelHealthService', () => {
|
||||
).rejects.toThrow('Health indicator not found: invalid');
|
||||
});
|
||||
});
|
||||
|
||||
describe('getQueueMetrics', () => {
|
||||
const mockQueue = {
|
||||
getMetrics: jest.fn(),
|
||||
getWorkers: jest.fn(),
|
||||
close: jest.fn(),
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
jest.clearAllMocks();
|
||||
redisClient.getClient.mockReturnValue({} as Redis);
|
||||
(Queue as unknown as jest.Mock).mockImplementation(() => mockQueue);
|
||||
});
|
||||
|
||||
it('should return metrics data for a queue with correct data transformation', async () => {
|
||||
const mockCompletedData = Array(240)
|
||||
.fill(0)
|
||||
.map((_, i) => i);
|
||||
const mockFailedData = Array(240)
|
||||
.fill(0)
|
||||
.map((_, i) => i * 0.1);
|
||||
|
||||
workerHealth.getQueueDetails.mockResolvedValue({
|
||||
queueName: 'test-queue',
|
||||
workers: 1,
|
||||
status: 'up',
|
||||
metrics: {
|
||||
active: 1,
|
||||
completed: 30,
|
||||
failed: 3,
|
||||
waiting: 0,
|
||||
delayed: 0,
|
||||
failureRate: 9.1,
|
||||
completedData: mockCompletedData,
|
||||
failedData: mockFailedData,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await service.getQueueMetrics(
|
||||
MessageQueue.messagingQueue,
|
||||
QueueMetricsTimeRange.FourHours,
|
||||
);
|
||||
|
||||
expect(result).toMatchObject({
|
||||
queueName: MessageQueue.messagingQueue,
|
||||
timeRange: QueueMetricsTimeRange.FourHours,
|
||||
workers: 1,
|
||||
details: expect.any(Object),
|
||||
data: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
id: 'Completed Jobs',
|
||||
data: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
x: expect.any(Number),
|
||||
y: expect.any(Number),
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
expect.objectContaining({
|
||||
id: 'Failed Jobs',
|
||||
data: expect.arrayContaining([
|
||||
expect.objectContaining({
|
||||
x: expect.any(Number),
|
||||
y: expect.any(Number),
|
||||
}),
|
||||
]),
|
||||
}),
|
||||
]),
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle empty metrics data', async () => {
|
||||
workerHealth.getQueueDetails.mockResolvedValue(null);
|
||||
|
||||
const result = await service.getQueueMetrics(
|
||||
MessageQueue.messagingQueue,
|
||||
QueueMetricsTimeRange.FourHours,
|
||||
);
|
||||
|
||||
expect(result.data).toHaveLength(2);
|
||||
expect(result.data[0].data).toHaveLength(240);
|
||||
expect(result.data[1].data).toHaveLength(240);
|
||||
});
|
||||
|
||||
it('should handle metrics service errors', async () => {
|
||||
workerHealth.getQueueDetails.mockRejectedValue(
|
||||
new Error('Metrics error'),
|
||||
);
|
||||
|
||||
await expect(
|
||||
service.getQueueMetrics(
|
||||
MessageQueue.messagingQueue,
|
||||
QueueMetricsTimeRange.FourHours,
|
||||
),
|
||||
).rejects.toThrow('Metrics error');
|
||||
|
||||
expect(loggerSpy).toHaveBeenCalledWith(
|
||||
'Error getting metrics for messaging-queue: Metrics error',
|
||||
);
|
||||
});
|
||||
|
||||
describe('backfilling behavior', () => {
|
||||
it('should handle partial data with correct historical backfilling', async () => {
|
||||
// Test with 40 recent points for 4-hour range (needs 240 points)
|
||||
const partialData = Array(40)
|
||||
.fill(0)
|
||||
.map((_, i) => i + 1);
|
||||
|
||||
workerHealth.getQueueDetails.mockResolvedValue({
|
||||
queueName: 'test-queue',
|
||||
workers: 1,
|
||||
status: 'up',
|
||||
metrics: {
|
||||
failed: 0,
|
||||
completed: 0,
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
delayed: 0,
|
||||
failureRate: 0,
|
||||
completedData: partialData,
|
||||
failedData: partialData,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await service.getQueueMetrics(
|
||||
MessageQueue.messagingQueue,
|
||||
QueueMetricsTimeRange.FourHours,
|
||||
);
|
||||
|
||||
// Should have 240 total points
|
||||
expect(result.data[0].data).toHaveLength(240);
|
||||
|
||||
// First 200 points should be zero (historical backfill)
|
||||
const historicalPoints = result.data[0].data.slice(0, 200);
|
||||
|
||||
expect(historicalPoints.every((point) => point.y === 0)).toBe(true);
|
||||
|
||||
// Last 40 points should be actual data
|
||||
const actualDataPoints = result.data[0].data.slice(200);
|
||||
|
||||
expect(actualDataPoints.every((point) => point.y > 0)).toBe(true);
|
||||
|
||||
// Verify chronological order (increasing values)
|
||||
const nonZeroValues = actualDataPoints.map((point) => point.y);
|
||||
|
||||
for (let i = 1; i < nonZeroValues.length; i++) {
|
||||
expect(nonZeroValues[i]).toBeGreaterThan(nonZeroValues[i - 1]);
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle completely empty data with full backfilling', async () => {
|
||||
workerHealth.getQueueDetails.mockResolvedValue({
|
||||
queueName: 'test-queue',
|
||||
workers: 1,
|
||||
status: 'up',
|
||||
metrics: {
|
||||
failed: 0,
|
||||
completed: 0,
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
delayed: 0,
|
||||
failureRate: 0,
|
||||
completedData: [],
|
||||
failedData: [],
|
||||
},
|
||||
});
|
||||
|
||||
const result = await service.getQueueMetrics(
|
||||
MessageQueue.messagingQueue,
|
||||
QueueMetricsTimeRange.OneHour,
|
||||
);
|
||||
|
||||
// Should have 60 points for one hour
|
||||
expect(result.data[0].data).toHaveLength(60);
|
||||
// All points should be zero
|
||||
expect(result.data[0].data.every((point) => point.y === 0)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('sampling behavior', () => {
|
||||
it('should correctly sample data for different time ranges', async () => {
|
||||
const testCases = [
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.OneHour,
|
||||
expectedPoints: 60,
|
||||
samplingFactor: 1,
|
||||
},
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.FourHours,
|
||||
expectedPoints: 240,
|
||||
samplingFactor: 1,
|
||||
},
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.OneDay,
|
||||
expectedPoints: 240,
|
||||
samplingFactor: 6,
|
||||
},
|
||||
];
|
||||
|
||||
for (const testCase of testCases) {
|
||||
// Create test data with non-zero values
|
||||
const testData = Array(testCase.expectedPoints * 2)
|
||||
.fill(0)
|
||||
.map((_, i) => i + 1); // Start from 1 to avoid zero values
|
||||
|
||||
workerHealth.getQueueDetails.mockResolvedValue({
|
||||
queueName: 'test-queue',
|
||||
workers: 1,
|
||||
status: 'up',
|
||||
metrics: {
|
||||
failed: 0,
|
||||
completed: 0,
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
delayed: 0,
|
||||
failureRate: 0,
|
||||
completedData: testData,
|
||||
failedData: testData,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await service.getQueueMetrics(
|
||||
MessageQueue.messagingQueue,
|
||||
testCase.timeRange,
|
||||
);
|
||||
|
||||
expect(result.data[0].data).toHaveLength(testCase.expectedPoints);
|
||||
|
||||
if (testCase.samplingFactor > 1) {
|
||||
const sampledData = result.data[0].data;
|
||||
|
||||
for (let i = 0; i < sampledData.length; i++) {
|
||||
const start = i * testCase.samplingFactor;
|
||||
const end = start + testCase.samplingFactor;
|
||||
const originalDataSlice = testData.slice(start, end);
|
||||
|
||||
if (originalDataSlice.length > 0) {
|
||||
// Add this check
|
||||
const maxInSlice = Math.max(...originalDataSlice);
|
||||
|
||||
expect(sampledData[i].y).toBeLessThanOrEqual(maxInSlice);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe('getPointsConfiguration', () => {
|
||||
const testCases = [
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.OneHour,
|
||||
expected: {
|
||||
pointsNeeded: 60,
|
||||
samplingFactor: 1,
|
||||
targetVisualizationPoints: 240,
|
||||
},
|
||||
},
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.FourHours,
|
||||
expected: {
|
||||
pointsNeeded: 240,
|
||||
samplingFactor: 1,
|
||||
targetVisualizationPoints: 240,
|
||||
},
|
||||
},
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.TwelveHours,
|
||||
expected: {
|
||||
pointsNeeded: 720,
|
||||
samplingFactor: 3,
|
||||
targetVisualizationPoints: 240,
|
||||
},
|
||||
},
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.OneDay,
|
||||
expected: {
|
||||
pointsNeeded: 1440,
|
||||
samplingFactor: 6,
|
||||
targetVisualizationPoints: 240,
|
||||
},
|
||||
},
|
||||
{
|
||||
timeRange: QueueMetricsTimeRange.SevenDays,
|
||||
expected: {
|
||||
pointsNeeded: 10080,
|
||||
samplingFactor: 42,
|
||||
targetVisualizationPoints: 240,
|
||||
},
|
||||
},
|
||||
];
|
||||
|
||||
testCases.forEach(({ timeRange, expected }) => {
|
||||
it(`should return correct parameters for ${timeRange}`, () => {
|
||||
const result = service['getPointsConfiguration'](timeRange as any);
|
||||
|
||||
expect(result).toEqual(expected);
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,23 +1,33 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { HealthIndicatorResult, HealthIndicatorStatus } from '@nestjs/terminus';
|
||||
|
||||
import { Queue } from 'bullmq';
|
||||
|
||||
import { HEALTH_INDICATORS } from 'src/engine/core-modules/admin-panel/constants/health-indicators.constants';
|
||||
import { AdminPanelHealthServiceData } from 'src/engine/core-modules/admin-panel/dtos/admin-panel-health-service-data.dto';
|
||||
import { QueueMetricsData } from 'src/engine/core-modules/admin-panel/dtos/queue-metrics-data.dto';
|
||||
import { SystemHealth } from 'src/engine/core-modules/admin-panel/dtos/system-health.dto';
|
||||
import { AdminPanelHealthServiceStatus } from 'src/engine/core-modules/admin-panel/enums/admin-panel-health-service-status.enum';
|
||||
import { QueueMetricsTimeRange } from 'src/engine/core-modules/admin-panel/enums/queue-metrics-time-range.enum';
|
||||
import { HealthIndicatorId } from 'src/engine/core-modules/health/enums/health-indicator-id.enum';
|
||||
import { ConnectedAccountHealth } from 'src/engine/core-modules/health/indicators/connected-account.health';
|
||||
import { DatabaseHealthIndicator } from 'src/engine/core-modules/health/indicators/database.health';
|
||||
import { RedisHealthIndicator } from 'src/engine/core-modules/health/indicators/redis.health';
|
||||
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
|
||||
import { WorkerQueueHealth } from 'src/engine/core-modules/health/types/worker-queue-health.type';
|
||||
import { MessageQueue } from 'src/engine/core-modules/message-queue/message-queue.constants';
|
||||
import { RedisClientService } from 'src/engine/core-modules/redis-client/redis-client.service';
|
||||
|
||||
@Injectable()
|
||||
export class AdminPanelHealthService {
|
||||
private readonly logger = new Logger(AdminPanelHealthService.name);
|
||||
|
||||
constructor(
|
||||
private readonly databaseHealth: DatabaseHealthIndicator,
|
||||
private readonly redisHealth: RedisHealthIndicator,
|
||||
private readonly workerHealth: WorkerHealthIndicator,
|
||||
private readonly connectedAccountHealth: ConnectedAccountHealth,
|
||||
private readonly redisClient: RedisClientService,
|
||||
) {}
|
||||
|
||||
private readonly healthIndicators = {
|
||||
@ -93,8 +103,8 @@ export class AdminPanelHealthService {
|
||||
return {
|
||||
...indicatorStatus,
|
||||
queues: (indicatorStatus?.queues ?? []).map((queue) => ({
|
||||
...queue,
|
||||
id: `${indicatorId}-${queue.queueName}`,
|
||||
queueName: queue.queueName,
|
||||
status:
|
||||
queue.workers > 0
|
||||
? AdminPanelHealthServiceStatus.OPERATIONAL
|
||||
@ -144,4 +154,166 @@ export class AdminPanelHealthService {
|
||||
],
|
||||
};
|
||||
}
|
||||
|
||||
async getQueueMetrics(
|
||||
queueName: MessageQueue,
|
||||
timeRange: QueueMetricsTimeRange = QueueMetricsTimeRange.OneDay,
|
||||
): Promise<QueueMetricsData> {
|
||||
const redis = this.redisClient.getClient();
|
||||
const queue = new Queue(queueName, { connection: redis });
|
||||
|
||||
try {
|
||||
const { pointsNeeded, samplingFactor } =
|
||||
this.getPointsConfiguration(timeRange);
|
||||
|
||||
const queueDetails = await this.workerHealth.getQueueDetails(queueName, {
|
||||
pointsNeeded,
|
||||
});
|
||||
|
||||
const completedMetricsArray = queueDetails?.metrics?.completedData;
|
||||
const failedMetricsArray = queueDetails?.metrics?.failedData;
|
||||
|
||||
const completedMetrics = this.extractMetricsData(
|
||||
completedMetricsArray,
|
||||
pointsNeeded,
|
||||
samplingFactor,
|
||||
);
|
||||
|
||||
const failedMetrics = this.extractMetricsData(
|
||||
failedMetricsArray,
|
||||
pointsNeeded,
|
||||
samplingFactor,
|
||||
);
|
||||
|
||||
return this.transformMetricsForGraph(
|
||||
completedMetrics,
|
||||
failedMetrics,
|
||||
timeRange,
|
||||
queueName,
|
||||
queueDetails,
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
`Error getting metrics for ${queueName}: ${error.message}`,
|
||||
);
|
||||
throw error;
|
||||
} finally {
|
||||
await queue.close();
|
||||
}
|
||||
}
|
||||
|
||||
private getPointsConfiguration(timeRange: QueueMetricsTimeRange): {
|
||||
pointsNeeded: number;
|
||||
samplingFactor: number;
|
||||
targetVisualizationPoints: number;
|
||||
} {
|
||||
const targetVisualizationPoints = 240;
|
||||
|
||||
let pointsNeeded: number;
|
||||
|
||||
switch (timeRange) {
|
||||
case QueueMetricsTimeRange.OneHour:
|
||||
pointsNeeded = 60; // 60 points (1 hour)
|
||||
break;
|
||||
case QueueMetricsTimeRange.FourHours:
|
||||
pointsNeeded = 4 * 60; // 240 points (4 hours)
|
||||
break;
|
||||
case QueueMetricsTimeRange.TwelveHours:
|
||||
pointsNeeded = 12 * 60; // 720 points (12 hours)
|
||||
break;
|
||||
case QueueMetricsTimeRange.OneDay:
|
||||
pointsNeeded = 24 * 60; // 1440 points (24 hours)
|
||||
break;
|
||||
case QueueMetricsTimeRange.SevenDays:
|
||||
pointsNeeded = 7 * 24 * 60; // 10080 points (7 days)
|
||||
break;
|
||||
|
||||
default:
|
||||
pointsNeeded = 24 * 60; // Default to 1 day
|
||||
}
|
||||
|
||||
const samplingFactor =
|
||||
pointsNeeded <= targetVisualizationPoints
|
||||
? 1
|
||||
: Math.ceil(pointsNeeded / targetVisualizationPoints);
|
||||
|
||||
return {
|
||||
pointsNeeded,
|
||||
samplingFactor,
|
||||
targetVisualizationPoints,
|
||||
};
|
||||
}
|
||||
|
||||
private extractMetricsData(
|
||||
metrics: number[] | undefined,
|
||||
pointsNeeded: number,
|
||||
samplingFactor = 1,
|
||||
): number[] {
|
||||
if (!metrics || !Array.isArray(metrics)) {
|
||||
return Array(Math.ceil(pointsNeeded / samplingFactor)).fill(0);
|
||||
}
|
||||
|
||||
try {
|
||||
const targetPoints = Math.ceil(pointsNeeded / samplingFactor);
|
||||
|
||||
const relevantData = metrics.slice(-pointsNeeded);
|
||||
const result: number[] = [];
|
||||
|
||||
const backfillCount = Math.max(
|
||||
0,
|
||||
targetPoints - Math.ceil(relevantData.length / samplingFactor),
|
||||
);
|
||||
|
||||
result.push(...Array(backfillCount).fill(0));
|
||||
|
||||
for (let i = 0; i < relevantData.length; i += samplingFactor) {
|
||||
const chunk = relevantData.slice(i, i + samplingFactor);
|
||||
|
||||
result.push(Math.max(...chunk));
|
||||
}
|
||||
|
||||
return result.slice(0, targetPoints);
|
||||
} catch (error) {
|
||||
this.logger.error(`Error extracting metrics data: ${error.message}`);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
private transformMetricsForGraph(
|
||||
completedMetrics: number[],
|
||||
failedMetrics: number[],
|
||||
timeRange: QueueMetricsTimeRange,
|
||||
queueName: MessageQueue,
|
||||
queueDetails: WorkerQueueHealth | null,
|
||||
): QueueMetricsData {
|
||||
try {
|
||||
return {
|
||||
queueName,
|
||||
timeRange,
|
||||
details: queueDetails?.metrics ?? null,
|
||||
workers: queueDetails?.workers ?? 0,
|
||||
data: [
|
||||
{
|
||||
id: 'Completed Jobs',
|
||||
data: completedMetrics.map((count, index) => ({
|
||||
x: index,
|
||||
y: count,
|
||||
})),
|
||||
},
|
||||
{
|
||||
id: 'Failed Jobs',
|
||||
data: failedMetrics.map((count, index) => ({
|
||||
x: index,
|
||||
y: count,
|
||||
})),
|
||||
},
|
||||
],
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
`Error transforming metrics for graph: ${error.message}`,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -10,20 +10,24 @@ import { SystemHealth } from 'src/engine/core-modules/admin-panel/dtos/system-he
|
||||
import { UpdateWorkspaceFeatureFlagInput } from 'src/engine/core-modules/admin-panel/dtos/update-workspace-feature-flag.input';
|
||||
import { UserLookup } from 'src/engine/core-modules/admin-panel/dtos/user-lookup.entity';
|
||||
import { UserLookupInput } from 'src/engine/core-modules/admin-panel/dtos/user-lookup.input';
|
||||
import { QueueMetricsTimeRange } from 'src/engine/core-modules/admin-panel/enums/queue-metrics-time-range.enum';
|
||||
import { AuthGraphqlApiExceptionFilter } from 'src/engine/core-modules/auth/filters/auth-graphql-api-exception.filter';
|
||||
import { HealthIndicatorId } from 'src/engine/core-modules/health/enums/health-indicator-id.enum';
|
||||
import { WorkerHealthIndicator } from 'src/engine/core-modules/health/indicators/worker.health';
|
||||
import { MessageQueue } from 'src/engine/core-modules/message-queue/message-queue.constants';
|
||||
import { ImpersonateGuard } from 'src/engine/guards/impersonate-guard';
|
||||
import { UserAuthGuard } from 'src/engine/guards/user-auth.guard';
|
||||
import { WorkspaceAuthGuard } from 'src/engine/guards/workspace-auth.guard';
|
||||
|
||||
import { AdminPanelHealthServiceData } from './dtos/admin-panel-health-service-data.dto';
|
||||
|
||||
import { QueueMetricsData } from './dtos/queue-metrics-data.dto';
|
||||
@Resolver()
|
||||
@UseFilters(AuthGraphqlApiExceptionFilter)
|
||||
export class AdminPanelResolver {
|
||||
constructor(
|
||||
private adminService: AdminPanelService,
|
||||
private adminPanelHealthService: AdminPanelHealthService,
|
||||
private workerHealthIndicator: WorkerHealthIndicator,
|
||||
) {}
|
||||
|
||||
@UseGuards(WorkspaceAuthGuard, UserAuthGuard, ImpersonateGuard)
|
||||
@ -68,6 +72,7 @@ export class AdminPanelResolver {
|
||||
return this.adminPanelHealthService.getSystemHealthStatus();
|
||||
}
|
||||
|
||||
@UseGuards(WorkspaceAuthGuard, UserAuthGuard, ImpersonateGuard)
|
||||
@Query(() => AdminPanelHealthServiceData)
|
||||
async getIndicatorHealthStatus(
|
||||
@Args('indicatorId', {
|
||||
@ -77,4 +82,22 @@ export class AdminPanelResolver {
|
||||
): Promise<AdminPanelHealthServiceData> {
|
||||
return this.adminPanelHealthService.getIndicatorHealthStatus(indicatorId);
|
||||
}
|
||||
|
||||
@UseGuards(WorkspaceAuthGuard, UserAuthGuard, ImpersonateGuard)
|
||||
@Query(() => QueueMetricsData)
|
||||
async getQueueMetrics(
|
||||
@Args('queueName', { type: () => String })
|
||||
queueName: string,
|
||||
@Args('timeRange', {
|
||||
nullable: true,
|
||||
defaultValue: QueueMetricsTimeRange.OneDay,
|
||||
type: () => QueueMetricsTimeRange,
|
||||
})
|
||||
timeRange: QueueMetricsTimeRange = QueueMetricsTimeRange.OneHour,
|
||||
): Promise<QueueMetricsData> {
|
||||
return await this.adminPanelHealthService.getQueueMetrics(
|
||||
queueName as MessageQueue,
|
||||
timeRange,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1,13 +1,15 @@
|
||||
import { Field, ObjectType } from '@nestjs/graphql';
|
||||
|
||||
import { AdminPanelHealthServiceStatus } from 'src/engine/core-modules/admin-panel/enums/admin-panel-health-service-status.enum';
|
||||
import { WorkerQueueHealth } from 'src/engine/core-modules/health/types/worker-queue-health.type';
|
||||
|
||||
@ObjectType()
|
||||
export class AdminPanelWorkerQueueHealth extends WorkerQueueHealth {
|
||||
export class AdminPanelWorkerQueueHealth {
|
||||
@Field(() => String)
|
||||
id: string;
|
||||
|
||||
@Field(() => String)
|
||||
queueName: string;
|
||||
|
||||
@Field(() => AdminPanelHealthServiceStatus)
|
||||
status: AdminPanelHealthServiceStatus;
|
||||
}
|
||||
|
||||
@ -0,0 +1,10 @@
|
||||
import { Field, ObjectType } from '@nestjs/graphql';
|
||||
|
||||
@ObjectType()
|
||||
export class QueueMetricsDataPoint {
|
||||
@Field(() => Number)
|
||||
x: number;
|
||||
|
||||
@Field(() => Number)
|
||||
y: number;
|
||||
}
|
||||
@ -0,0 +1,23 @@
|
||||
import { Field, ObjectType } from '@nestjs/graphql';
|
||||
|
||||
import { QueueMetricsSeries } from 'src/engine/core-modules/admin-panel/dtos/queue-metrics-series.dto';
|
||||
import { QueueMetricsTimeRange } from 'src/engine/core-modules/admin-panel/enums/queue-metrics-time-range.enum';
|
||||
import { WorkerQueueMetrics } from 'src/engine/core-modules/health/types/worker-queue-metrics.type';
|
||||
|
||||
@ObjectType()
|
||||
export class QueueMetricsData {
|
||||
@Field(() => String)
|
||||
queueName: string;
|
||||
|
||||
@Field(() => Number)
|
||||
workers: number;
|
||||
|
||||
@Field(() => QueueMetricsTimeRange)
|
||||
timeRange: QueueMetricsTimeRange;
|
||||
|
||||
@Field(() => WorkerQueueMetrics, { nullable: true })
|
||||
details: WorkerQueueMetrics | null;
|
||||
|
||||
@Field(() => [QueueMetricsSeries])
|
||||
data: QueueMetricsSeries[];
|
||||
}
|
||||
@ -0,0 +1,12 @@
|
||||
import { Field, ObjectType } from '@nestjs/graphql';
|
||||
|
||||
import { QueueMetricsDataPoint } from 'src/engine/core-modules/admin-panel/dtos/queue-metrics-data-point.dto';
|
||||
|
||||
@ObjectType()
|
||||
export class QueueMetricsSeries {
|
||||
@Field()
|
||||
id: string;
|
||||
|
||||
@Field(() => [QueueMetricsDataPoint])
|
||||
data: QueueMetricsDataPoint[];
|
||||
}
|
||||
@ -0,0 +1,13 @@
|
||||
import { registerEnumType } from '@nestjs/graphql';
|
||||
|
||||
export enum QueueMetricsTimeRange {
|
||||
SevenDays = '7D',
|
||||
OneDay = '1D',
|
||||
TwelveHours = '12H',
|
||||
FourHours = '4H',
|
||||
OneHour = '1H',
|
||||
}
|
||||
|
||||
registerEnumType(QueueMetricsTimeRange, {
|
||||
name: 'QueueMetricsTimeRange',
|
||||
});
|
||||
@ -12,12 +12,10 @@ import { RedisClientService } from 'src/engine/core-modules/redis-client/redis-c
|
||||
const mockQueueInstance = {
|
||||
getWorkers: jest.fn().mockResolvedValue([]),
|
||||
close: jest.fn().mockResolvedValue(undefined),
|
||||
getFailedCount: jest.fn().mockResolvedValue(0),
|
||||
getCompletedCount: jest.fn().mockResolvedValue(0),
|
||||
getMetrics: jest.fn().mockResolvedValue({ count: 0, data: [] }),
|
||||
getWaitingCount: jest.fn().mockResolvedValue(0),
|
||||
getActiveCount: jest.fn().mockResolvedValue(0),
|
||||
getDelayedCount: jest.fn().mockResolvedValue(0),
|
||||
getPrioritizedCount: jest.fn().mockResolvedValue(0),
|
||||
};
|
||||
|
||||
jest.mock('bullmq', () => ({
|
||||
@ -28,6 +26,7 @@ describe('WorkerHealthIndicator', () => {
|
||||
let service: WorkerHealthIndicator;
|
||||
let mockRedis: jest.Mocked<Pick<Redis, 'ping'>>;
|
||||
let healthIndicatorService: jest.Mocked<HealthIndicatorService>;
|
||||
let loggerSpy: jest.SpyInstance;
|
||||
|
||||
beforeEach(async () => {
|
||||
mockRedis = {
|
||||
@ -64,11 +63,23 @@ describe('WorkerHealthIndicator', () => {
|
||||
}).compile();
|
||||
|
||||
service = module.get<WorkerHealthIndicator>(WorkerHealthIndicator);
|
||||
|
||||
loggerSpy = jest
|
||||
.spyOn(service['logger'], 'error')
|
||||
.mockImplementation(() => {});
|
||||
jest.useFakeTimers();
|
||||
|
||||
// Reset mocks to their default success state before each test
|
||||
mockQueueInstance.getWorkers.mockResolvedValue([]);
|
||||
mockQueueInstance.getMetrics.mockResolvedValue({ count: 0, data: [] });
|
||||
mockQueueInstance.getWaitingCount.mockResolvedValue(0);
|
||||
mockQueueInstance.getActiveCount.mockResolvedValue(0);
|
||||
mockQueueInstance.getDelayedCount.mockResolvedValue(0);
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
jest.useRealTimers();
|
||||
jest.clearAllMocks();
|
||||
});
|
||||
|
||||
it('should be defined', () => {
|
||||
@ -133,4 +144,204 @@ describe('WorkerHealthIndicator', () => {
|
||||
Object.keys(MessageQueue).length,
|
||||
);
|
||||
});
|
||||
|
||||
it('should return down status when failure rate exceeds threshold', async () => {
|
||||
mockQueueInstance.getWorkers.mockResolvedValue([{ id: 'worker1' }]);
|
||||
mockQueueInstance.getMetrics.mockImplementation((type) => {
|
||||
if (type === 'failed') {
|
||||
return Promise.resolve({ count: 600 });
|
||||
}
|
||||
if (type === 'completed') {
|
||||
return Promise.resolve({ count: 400 });
|
||||
}
|
||||
|
||||
return Promise.resolve({ count: 0 });
|
||||
});
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.worker.status).toBe('up');
|
||||
expect('queues' in result.worker).toBe(true);
|
||||
if ('queues' in result.worker) {
|
||||
expect(result.worker.queues[0].status).toBe('down');
|
||||
expect(result.worker.queues[0].metrics).toEqual({
|
||||
failed: 600,
|
||||
completed: 400,
|
||||
waiting: 0,
|
||||
active: 0,
|
||||
delayed: 0,
|
||||
failureRate: 60,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('should return complete metrics for active workers', async () => {
|
||||
mockQueueInstance.getWorkers.mockResolvedValue([{ id: 'worker1' }]);
|
||||
mockQueueInstance.getMetrics.mockImplementation((type) => {
|
||||
if (type === 'failed') {
|
||||
return Promise.resolve({ count: 10 });
|
||||
}
|
||||
if (type === 'completed') {
|
||||
return Promise.resolve({ count: 90 });
|
||||
}
|
||||
|
||||
return Promise.resolve({ count: 0 });
|
||||
});
|
||||
mockQueueInstance.getWaitingCount.mockResolvedValue(5);
|
||||
mockQueueInstance.getActiveCount.mockResolvedValue(2);
|
||||
mockQueueInstance.getDelayedCount.mockResolvedValue(1);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.worker.status).toBe('up');
|
||||
expect('queues' in result.worker).toBe(true);
|
||||
if ('queues' in result.worker) {
|
||||
expect(result.worker.queues[0].metrics).toEqual({
|
||||
failed: 10,
|
||||
completed: 90,
|
||||
waiting: 5,
|
||||
active: 2,
|
||||
delayed: 1,
|
||||
failureRate: 10,
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
it('should handle queue errors gracefully', async () => {
|
||||
mockQueueInstance.getWorkers.mockRejectedValue(new Error('Queue error'));
|
||||
mockQueueInstance.getMetrics.mockRejectedValue(new Error('Queue error'));
|
||||
mockQueueInstance.getWaitingCount.mockRejectedValue(
|
||||
new Error('Queue error'),
|
||||
);
|
||||
mockQueueInstance.getActiveCount.mockRejectedValue(
|
||||
new Error('Queue error'),
|
||||
);
|
||||
mockQueueInstance.getDelayedCount.mockRejectedValue(
|
||||
new Error('Queue error'),
|
||||
);
|
||||
|
||||
const result = await service.isHealthy();
|
||||
|
||||
expect(result.worker.status).toBe('down');
|
||||
expect('error' in result.worker).toBe(true);
|
||||
if ('error' in result.worker) {
|
||||
expect(result.worker.error).toBe(HEALTH_ERROR_MESSAGES.NO_ACTIVE_WORKERS);
|
||||
}
|
||||
|
||||
expect(loggerSpy).toHaveBeenCalled();
|
||||
Object.values(MessageQueue).forEach((queueName) => {
|
||||
expect(loggerSpy).toHaveBeenCalledWith(
|
||||
`Error getting queue details for ${queueName}: Queue error`,
|
||||
);
|
||||
expect(loggerSpy).toHaveBeenCalledWith(
|
||||
`Error checking worker for queue ${queueName}: Queue error`,
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
describe('getQueueDetails', () => {
|
||||
beforeEach(() => {
|
||||
// Reset mocks to clean state before each test in this describe block
|
||||
mockQueueInstance.getWorkers.mockResolvedValue([{ id: 'worker1' }]);
|
||||
mockQueueInstance.getMetrics.mockResolvedValue({ count: 0, data: [] });
|
||||
});
|
||||
|
||||
it('should return metrics with time series data when pointsNeeded is provided', async () => {
|
||||
const pointsNeeded = 60;
|
||||
|
||||
mockQueueInstance.getMetrics.mockImplementation((type) => {
|
||||
if (type === 'failed') {
|
||||
return Promise.resolve({
|
||||
count: 10,
|
||||
data: Array(pointsNeeded).fill(10 / pointsNeeded),
|
||||
});
|
||||
}
|
||||
if (type === 'completed') {
|
||||
return Promise.resolve({
|
||||
count: 90,
|
||||
data: Array(pointsNeeded).fill(90 / pointsNeeded),
|
||||
});
|
||||
}
|
||||
|
||||
return Promise.resolve({ count: 0, data: [] });
|
||||
});
|
||||
|
||||
const result = await service.getQueueDetails(
|
||||
MessageQueue.messagingQueue,
|
||||
{
|
||||
pointsNeeded,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.metrics).toMatchObject({
|
||||
failed: 10,
|
||||
completed: 90,
|
||||
failedData: expect.any(Array),
|
||||
completedData: expect.any(Array),
|
||||
});
|
||||
expect(result?.metrics.failedData).toHaveLength(pointsNeeded);
|
||||
expect(result?.metrics.completedData).toHaveLength(pointsNeeded);
|
||||
});
|
||||
|
||||
it('should handle invalid metrics data gracefully', async () => {
|
||||
const invalidData = ['invalid', null, undefined, '1', 2];
|
||||
|
||||
mockQueueInstance.getMetrics.mockResolvedValue({
|
||||
count: 0,
|
||||
data: invalidData,
|
||||
});
|
||||
|
||||
const result = await service.getQueueDetails(
|
||||
MessageQueue.messagingQueue,
|
||||
{
|
||||
pointsNeeded: 5,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.metrics.failedData).toEqual([NaN, 0, NaN, 1, 2]);
|
||||
expect(result?.metrics.completedData).toEqual([NaN, 0, NaN, 1, 2]);
|
||||
});
|
||||
|
||||
it('should calculate correct failure rate with time series data', async () => {
|
||||
mockQueueInstance.getMetrics.mockImplementation((type) => {
|
||||
if (type === 'failed') {
|
||||
return Promise.resolve({ count: 600, data: Array(10).fill(60) });
|
||||
}
|
||||
if (type === 'completed') {
|
||||
return Promise.resolve({ count: 400, data: Array(10).fill(40) });
|
||||
}
|
||||
|
||||
return Promise.resolve({ count: 0, data: [] });
|
||||
});
|
||||
|
||||
const result = await service.getQueueDetails(
|
||||
MessageQueue.messagingQueue,
|
||||
{
|
||||
pointsNeeded: 10,
|
||||
},
|
||||
);
|
||||
|
||||
expect(result).toBeDefined();
|
||||
expect(result?.metrics).toMatchObject({
|
||||
failed: 600,
|
||||
completed: 400,
|
||||
failureRate: 60,
|
||||
});
|
||||
});
|
||||
|
||||
it('should handle queue errors gracefully', async () => {
|
||||
mockQueueInstance.getWorkers.mockRejectedValue(new Error('Queue error'));
|
||||
mockQueueInstance.getMetrics.mockRejectedValue(new Error('Queue error'));
|
||||
|
||||
await expect(
|
||||
service.getQueueDetails(MessageQueue.messagingQueue),
|
||||
).rejects.toThrow('Queue error');
|
||||
|
||||
expect(loggerSpy).toHaveBeenCalledWith(
|
||||
`Error getting queue details for ${MessageQueue.messagingQueue}: Queue error`,
|
||||
);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
@ -1,9 +1,13 @@
|
||||
import { Injectable } from '@nestjs/common';
|
||||
import { HealthIndicatorService } from '@nestjs/terminus';
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import {
|
||||
HealthIndicatorResult,
|
||||
HealthIndicatorService,
|
||||
} from '@nestjs/terminus';
|
||||
|
||||
import { Queue } from 'bullmq';
|
||||
|
||||
import { HEALTH_ERROR_MESSAGES } from 'src/engine/core-modules/health/constants/health-error-messages.constants';
|
||||
import { METRICS_FAILURE_RATE_THRESHOLD } from 'src/engine/core-modules/health/constants/metrics-failure-rate-threshold.const';
|
||||
import { WorkerQueueHealth } from 'src/engine/core-modules/health/types/worker-queue-health.type';
|
||||
import { withHealthCheckTimeout } from 'src/engine/core-modules/health/utils/health-check-timeout.util';
|
||||
import { MessageQueue } from 'src/engine/core-modules/message-queue/message-queue.constants';
|
||||
@ -11,12 +15,14 @@ import { RedisClientService } from 'src/engine/core-modules/redis-client/redis-c
|
||||
|
||||
@Injectable()
|
||||
export class WorkerHealthIndicator {
|
||||
private readonly logger = new Logger(WorkerHealthIndicator.name);
|
||||
|
||||
constructor(
|
||||
private readonly redisClient: RedisClientService,
|
||||
private readonly healthIndicatorService: HealthIndicatorService,
|
||||
) {}
|
||||
|
||||
async isHealthy() {
|
||||
async isHealthy(): Promise<HealthIndicatorResult> {
|
||||
const indicator = this.healthIndicatorService.check('worker');
|
||||
|
||||
try {
|
||||
@ -42,51 +48,106 @@ export class WorkerHealthIndicator {
|
||||
}
|
||||
}
|
||||
|
||||
private async checkWorkers() {
|
||||
async getQueueDetails(
|
||||
queueName: MessageQueue,
|
||||
options?: {
|
||||
pointsNeeded?: number;
|
||||
},
|
||||
): Promise<WorkerQueueHealth | null> {
|
||||
const redis = this.redisClient.getClient();
|
||||
const queue = new Queue(queueName, { connection: redis });
|
||||
|
||||
try {
|
||||
const workers = await queue.getWorkers();
|
||||
|
||||
if (workers.length > 0) {
|
||||
const metricsParams = options?.pointsNeeded
|
||||
? [0, options.pointsNeeded - 1]
|
||||
: [];
|
||||
|
||||
const [
|
||||
failedMetrics,
|
||||
completedMetrics,
|
||||
waitingCount,
|
||||
activeCount,
|
||||
delayedCount,
|
||||
] = await Promise.all([
|
||||
queue.getMetrics('failed', ...metricsParams),
|
||||
queue.getMetrics('completed', ...metricsParams),
|
||||
queue.getWaitingCount(),
|
||||
queue.getActiveCount(),
|
||||
queue.getDelayedCount(),
|
||||
]);
|
||||
|
||||
const failedCount = options?.pointsNeeded
|
||||
? this.calculateMetricsSum(failedMetrics.data)
|
||||
: failedMetrics.count;
|
||||
|
||||
const completedCount = options?.pointsNeeded
|
||||
? this.calculateMetricsSum(completedMetrics.data)
|
||||
: completedMetrics.count;
|
||||
|
||||
const totalJobs = failedCount + completedCount;
|
||||
const failureRate =
|
||||
totalJobs > 0
|
||||
? Number(((failedCount / totalJobs) * 100).toFixed(1))
|
||||
: 0;
|
||||
|
||||
return {
|
||||
queueName,
|
||||
workers: workers.length,
|
||||
status: failureRate > METRICS_FAILURE_RATE_THRESHOLD ? 'down' : 'up',
|
||||
metrics: {
|
||||
failed: failedCount,
|
||||
completed: completedCount,
|
||||
waiting: waitingCount,
|
||||
active: activeCount,
|
||||
delayed: delayedCount,
|
||||
failureRate,
|
||||
...(options?.pointsNeeded && {
|
||||
failedData: failedMetrics.data.map(Number),
|
||||
completedData: completedMetrics.data.map(Number),
|
||||
}),
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
`Error getting queue details for ${queueName}: ${error.message}`,
|
||||
);
|
||||
throw error;
|
||||
} finally {
|
||||
await queue.close();
|
||||
}
|
||||
}
|
||||
|
||||
private calculateMetricsSum(data: string[] | number[]): number {
|
||||
const sum = data.reduce((sum: number, value: string | number) => {
|
||||
const numericValue = Number(value);
|
||||
|
||||
return sum + (isNaN(numericValue) ? 0 : numericValue);
|
||||
}, 0);
|
||||
|
||||
return Math.round(Number(sum));
|
||||
}
|
||||
|
||||
private async checkWorkers() {
|
||||
const queues = Object.values(MessageQueue);
|
||||
const queueStatuses: WorkerQueueHealth[] = [];
|
||||
|
||||
for (const queueName of queues) {
|
||||
const queue = new Queue(queueName, { connection: redis });
|
||||
|
||||
try {
|
||||
const workers = await queue.getWorkers();
|
||||
const queueDetails = await this.getQueueDetails(queueName);
|
||||
|
||||
if (workers.length > 0) {
|
||||
const [
|
||||
failedCount,
|
||||
completedCount,
|
||||
waitingCount,
|
||||
activeCount,
|
||||
delayedCount,
|
||||
prioritizedCount,
|
||||
] = await Promise.all([
|
||||
queue.getFailedCount(),
|
||||
queue.getCompletedCount(),
|
||||
queue.getWaitingCount(),
|
||||
queue.getActiveCount(),
|
||||
queue.getDelayedCount(),
|
||||
queue.getPrioritizedCount(),
|
||||
]);
|
||||
|
||||
queueStatuses.push({
|
||||
queueName: queueName,
|
||||
workers: workers.length,
|
||||
metrics: {
|
||||
failed: failedCount,
|
||||
completed: completedCount,
|
||||
waiting: waitingCount,
|
||||
active: activeCount,
|
||||
delayed: delayedCount,
|
||||
prioritized: prioritizedCount,
|
||||
},
|
||||
});
|
||||
if (queueDetails) {
|
||||
queueStatuses.push(queueDetails);
|
||||
}
|
||||
|
||||
await queue.close();
|
||||
} catch (error) {
|
||||
await queue.close();
|
||||
this.logger.error(
|
||||
`Error checking worker for queue ${queueName}: ${error.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -7,6 +7,9 @@ export class WorkerQueueHealth {
|
||||
@Field(() => String)
|
||||
queueName: string;
|
||||
|
||||
@Field(() => String)
|
||||
status: string;
|
||||
|
||||
@Field(() => Number)
|
||||
workers: number;
|
||||
|
||||
|
||||
@ -18,5 +18,11 @@ export class WorkerQueueMetrics {
|
||||
delayed: number;
|
||||
|
||||
@Field(() => Number)
|
||||
prioritized: number;
|
||||
failureRate: number;
|
||||
|
||||
@Field(() => [Number], { nullable: true })
|
||||
failedData?: number[];
|
||||
|
||||
@Field(() => [Number], { nullable: true })
|
||||
completedData?: number[];
|
||||
}
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
import { OnModuleDestroy } from '@nestjs/common';
|
||||
|
||||
import { JobsOptions, Queue, QueueOptions, Worker } from 'bullmq';
|
||||
import { JobsOptions, MetricsTime, Queue, QueueOptions, Worker } from 'bullmq';
|
||||
import { isDefined } from 'twenty-shared';
|
||||
import { v4 } from 'uuid';
|
||||
|
||||
@ -50,12 +50,16 @@ export class BullMQDriver implements MessageQueueDriver, OnModuleDestroy {
|
||||
handler: (job: MessageQueueJob<T>) => Promise<void>,
|
||||
options?: MessageQueueWorkerOptions,
|
||||
) {
|
||||
const workerOptions = isDefined(options?.concurrency)
|
||||
? {
|
||||
...this.options,
|
||||
concurrency: options.concurrency,
|
||||
}
|
||||
: this.options;
|
||||
const workerOptions = {
|
||||
...this.options,
|
||||
...(isDefined(options?.concurrency)
|
||||
? { concurrency: options.concurrency }
|
||||
: {}),
|
||||
metrics: {
|
||||
maxDataPoints: MetricsTime.ONE_WEEK,
|
||||
collectInterval: 60000,
|
||||
},
|
||||
};
|
||||
|
||||
this.workerMap[queueName] = new Worker(
|
||||
queueName,
|
||||
|
||||
Reference in New Issue
Block a user