set up metrics collecting with open telemetry (#11236)

Done :  
- move metrics and health cache services from health module to metrics
module
- refactor metrics counter from specific method to set up from enum keys
- add OpenTelemetry (Otel) instrumentation for metrics
- set up Otel SDK to send metrics to Otel collector

To do later : 
- implement Otel instrumentation for traces + plug Sentry on top
This commit is contained in:
Etienne
2025-03-28 08:45:24 +01:00
committed by GitHub
parent e9e33c4d29
commit 391392dd87
32 changed files with 575 additions and 297 deletions

View File

@ -0,0 +1,31 @@
import { MetricsKeys } from 'src/engine/core-modules/metrics/types/metrics-keys.type';
export const MESSAGE_SYNC_METRICS_BY_STATUS = [
{
name: 'ACTIVE',
cacheKey: MetricsKeys.MessageChannelSyncJobActive,
},
{
name: 'FAILED_UNKNOWN',
cacheKey: MetricsKeys.MessageChannelSyncJobFailedUnknown,
},
{
name: 'FAILED_INSUFFICIENT_PERMISSIONS',
cacheKey: MetricsKeys.MessageChannelSyncJobFailedInsufficientPermissions,
},
];
export const CALENDAR_SYNC_METRICS_BY_STATUS = [
{
name: 'ACTIVE',
cacheKey: MetricsKeys.CalendarEventSyncJobActive,
},
{
name: 'FAILED_UNKNOWN',
cacheKey: MetricsKeys.CalendarEventSyncJobFailedUnknown,
},
{
name: 'FAILED_INSUFFICIENT_PERMISSIONS',
cacheKey: MetricsKeys.CalendarEventSyncJobFailedInsufficientPermissions,
},
];

View File

@ -0,0 +1,94 @@
import { Injectable } from '@nestjs/common';
import { InjectCacheStorage } from 'src/engine/core-modules/cache-storage/decorators/cache-storage.decorator';
import { CacheStorageService } from 'src/engine/core-modules/cache-storage/services/cache-storage.service';
import { CacheStorageNamespace } from 'src/engine/core-modules/cache-storage/types/cache-storage-namespace.enum';
import { EnvironmentService } from 'src/engine/core-modules/environment/environment.service';
import { MetricsKeys } from 'src/engine/core-modules/metrics/types/metrics-keys.type';
const CACHE_BUCKET_DURATION_MS = 15000; // 15 seconds window for each cache bucket
@Injectable()
export class MetricsCacheService {
private readonly healthMetricsTimeWindowInMinutes: number;
private readonly healthCacheTtl: number;
constructor(
@InjectCacheStorage(CacheStorageNamespace.EngineHealth)
private readonly cacheStorage: CacheStorageService,
private readonly environmentService: EnvironmentService,
) {
this.healthMetricsTimeWindowInMinutes = this.environmentService.get(
'HEALTH_METRICS_TIME_WINDOW_IN_MINUTES',
);
this.healthCacheTtl = this.healthMetricsTimeWindowInMinutes * 60000 * 2;
}
private getCacheBucketStartTimestamp(timestamp: number): number {
return (
Math.floor(timestamp / CACHE_BUCKET_DURATION_MS) *
CACHE_BUCKET_DURATION_MS
);
}
private getCacheKeyWithTimestamp(key: string, timestamp?: number): string {
const currentIntervalTimestamp =
timestamp ?? this.getCacheBucketStartTimestamp(Date.now());
return `${key}:${currentIntervalTimestamp}`;
}
private getLastCacheBucketStartTimestampsFromDate(
cacheBucketsCount: number,
date: number,
): number[] {
const currentIntervalTimestamp = this.getCacheBucketStartTimestamp(date);
return Array.from(
{ length: cacheBucketsCount },
(_, i) => currentIntervalTimestamp - i * CACHE_BUCKET_DURATION_MS,
);
}
async updateCounter(key: MetricsKeys, items: string[]) {
return await this.cacheStorage.setAdd(
this.getCacheKeyWithTimestamp(key),
items,
this.healthCacheTtl,
);
}
async computeCount({
key,
timeWindowInSeconds = this.healthMetricsTimeWindowInMinutes * 60,
date = Date.now(),
}: {
key: MetricsKeys;
timeWindowInSeconds?: number;
date?: number;
}): Promise<number> {
if ((timeWindowInSeconds * 1000) % CACHE_BUCKET_DURATION_MS !== 0) {
throw new Error(
`Time window must be divisible by ${CACHE_BUCKET_DURATION_MS}`,
);
}
const cacheBuckets =
timeWindowInSeconds / (CACHE_BUCKET_DURATION_MS / 1000);
const cacheKeys = this.computeTimeStampedCacheKeys(key, cacheBuckets, date);
return await this.cacheStorage.countAllSetMembers(cacheKeys);
}
computeTimeStampedCacheKeys(
key: string,
cacheBucketsCount: number,
date: number,
) {
return this.getLastCacheBucketStartTimestampsFromDate(
cacheBucketsCount,
date,
).map((timestamp) => this.getCacheKeyWithTimestamp(key, timestamp));
}
}

View File

@ -0,0 +1,10 @@
import { Module } from '@nestjs/common';
import { MetricsCacheService } from 'src/engine/core-modules/metrics/metrics-cache.service';
import { MetricsService } from 'src/engine/core-modules/metrics/metrics.service';
@Module({
providers: [MetricsService, MetricsCacheService],
exports: [MetricsService, MetricsCacheService],
})
export class MetricsModule {}

View File

@ -0,0 +1,70 @@
import { Injectable } from '@nestjs/common';
import { metrics } from '@opentelemetry/api';
import { MetricsCacheService } from 'src/engine/core-modules/metrics/metrics-cache.service';
import { MetricsKeys } from 'src/engine/core-modules/metrics/types/metrics-keys.type';
@Injectable()
export class MetricsService {
constructor(private readonly metricsCacheService: MetricsCacheService) {}
async incrementCounter({
key,
eventId,
shouldStoreInCache = true,
}: {
key: MetricsKeys;
eventId: string;
shouldStoreInCache?: boolean;
}) {
//TODO : Define meter name usage in monitoring
const meter = metrics.getMeter('twenty-server');
const counter = meter.createCounter(key);
counter.add(1);
if (shouldStoreInCache) {
this.metricsCacheService.updateCounter(key, [eventId]);
}
}
async batchIncrementCounter({
key,
eventIds,
shouldStoreInCache = true,
}: {
key: MetricsKeys;
eventIds: string[];
shouldStoreInCache?: boolean;
}) {
//TODO : Define meter name usage in monitoring
const meter = metrics.getMeter('twenty-server');
const counter = meter.createCounter(key);
counter.add(eventIds.length);
if (shouldStoreInCache) {
this.metricsCacheService.updateCounter(key, eventIds);
}
}
async groupMetrics(
metrics: { name: string; cacheKey: MetricsKeys }[],
): Promise<Record<string, number>> {
const groupedMetrics: Record<string, number> = {};
const date = Date.now();
for (const metric of metrics) {
const metricValue = await this.metricsCacheService.computeCount({
key: metric.cacheKey,
date,
});
groupedMetrics[metric.name] = metricValue;
}
return groupedMetrics;
}
}

View File

@ -0,0 +1,4 @@
export enum MeterDriver {
OpenTelemetry = 'opentelemetry',
Console = 'console',
}

View File

@ -0,0 +1,9 @@
export enum MetricsKeys {
MessageChannelSyncJobActive = 'message-channel-sync-job/active',
MessageChannelSyncJobFailedInsufficientPermissions = 'message-channel-sync-job/failed-insufficient-permissions',
MessageChannelSyncJobFailedUnknown = 'message-channel-sync-job/failed-unknown',
CalendarEventSyncJobActive = 'calendar-event-sync-job/active',
CalendarEventSyncJobFailedInsufficientPermissions = 'calendar-event-sync-job/failed-insufficient-permissions',
CalendarEventSyncJobFailedUnknown = 'calendar-event-sync-job/failed-unknown',
InvalidCaptcha = 'invalid-captcha',
}