Health monitor status for admin panel (#10186)

# Health Monitoring for Self-Hosted Instances

This PR implements basic health monitoring for self-hosted instances in
the admin panel.

## Service Status Checks
We're adding real-time health checks for:
- Redis Connection
- Database Connection
- Worker Status
- Message Sync Status

## Existing Functionality
We already have message sync and captcha counters that store aggregated
metrics in cache within a configurable time window (default: 5 minutes).

## New Endpoints
1. `/healthz` - Basic server health check for Kubernetes pod monitoring
2. `/healthz/{serviceName}` - Individual service health checks (returns
200 if healthy)
3. `/metricsz/{metricName}` - Time-windowed metrics (message sync,
captcha)
4. GraphQL resolver in admin panel for UI consumption

All endpoints use the same underlying service, with different
presentation layers for infrastructure and UI needs.

---------

Co-authored-by: Félix Malfait <felix@twenty.com>
This commit is contained in:
nitin
2025-02-18 20:22:19 +05:30
committed by GitHub
parent 2fca60436b
commit d6655a2c3b
54 changed files with 2307 additions and 95 deletions

View File

@ -25,6 +25,33 @@ export type ActivateWorkspaceInput = {
displayName?: InputMaybe<Scalars['String']>;
};
export type AdminPanelHealthServiceData = {
__typename?: 'AdminPanelHealthServiceData';
details?: Maybe<Scalars['String']>;
queues?: Maybe<Array<AdminPanelWorkerQueueHealth>>;
status: AdminPanelHealthServiceStatus;
};
export enum AdminPanelHealthServiceStatus {
OPERATIONAL = 'OPERATIONAL',
OUTAGE = 'OUTAGE'
}
export enum AdminPanelIndicatorHealthStatusInputEnum {
DATABASE = 'DATABASE',
MESSAGE_SYNC = 'MESSAGE_SYNC',
REDIS = 'REDIS',
WORKER = 'WORKER'
}
export type AdminPanelWorkerQueueHealth = {
__typename?: 'AdminPanelWorkerQueueHealth';
metrics: WorkerQueueMetrics;
name: Scalars['String'];
status: AdminPanelHealthServiceStatus;
workers: Scalars['Float'];
};
export type Analytics = {
__typename?: 'Analytics';
/** Boolean that confirms query was dispatched */
@ -1232,11 +1259,13 @@ export type Query = {
findWorkspaceInvitations: Array<WorkspaceInvitation>;
getAvailablePackages: Scalars['JSON'];
getEnvironmentVariablesGrouped: EnvironmentVariablesOutput;
getIndicatorHealthStatus: AdminPanelHealthServiceData;
getPostgresCredentials?: Maybe<PostgresCredentials>;
getProductPrices: BillingProductPricesOutput;
getPublicWorkspaceDataByDomain: PublicWorkspaceDataOutput;
getRoles: Array<Role>;
getServerlessFunctionSourceCode?: Maybe<Scalars['JSON']>;
getSystemHealthStatus: SystemHealth;
getTimelineCalendarEventsFromCompanyId: TimelineCalendarEventsWithTotal;
getTimelineCalendarEventsFromPersonId: TimelineCalendarEventsWithTotal;
getTimelineThreadsFromCompanyId: TimelineThreadsWithTotal;
@ -1287,6 +1316,11 @@ export type QueryGetAvailablePackagesArgs = {
};
export type QueryGetIndicatorHealthStatusArgs = {
indicatorName: AdminPanelIndicatorHealthStatusInputEnum;
};
export type QueryGetProductPricesArgs = {
product: Scalars['String'];
};
@ -1605,6 +1639,14 @@ export type Support = {
supportFrontChatId?: Maybe<Scalars['String']>;
};
export type SystemHealth = {
__typename?: 'SystemHealth';
database: AdminPanelHealthServiceData;
messageSync: AdminPanelHealthServiceData;
redis: AdminPanelHealthServiceData;
worker: AdminPanelHealthServiceData;
};
export type TimelineCalendarEvent = {
__typename?: 'TimelineCalendarEvent';
conferenceLink: LinksMetadata;
@ -1854,6 +1896,16 @@ export type ValidatePasswordResetToken = {
id: Scalars['String'];
};
export type WorkerQueueMetrics = {
__typename?: 'WorkerQueueMetrics';
active: Scalars['Float'];
completed: Scalars['Float'];
delayed: Scalars['Float'];
failed: Scalars['Float'];
prioritized: Scalars['Float'];
waiting: Scalars['Float'];
};
export type WorkflowAction = {
__typename?: 'WorkflowAction';
id: Scalars['UUID'];
@ -2250,6 +2302,18 @@ export type GetEnvironmentVariablesGroupedQueryVariables = Exact<{ [key: string]
export type GetEnvironmentVariablesGroupedQuery = { __typename?: 'Query', getEnvironmentVariablesGrouped: { __typename?: 'EnvironmentVariablesOutput', groups: Array<{ __typename?: 'EnvironmentVariablesGroupData', name: EnvironmentVariablesGroup, description: string, isHiddenOnLoad: boolean, variables: Array<{ __typename?: 'EnvironmentVariable', name: string, description: string, value: string, sensitive: boolean }> }> } };
export type GetIndicatorHealthStatusQueryVariables = Exact<{
indicatorName: AdminPanelIndicatorHealthStatusInputEnum;
}>;
export type GetIndicatorHealthStatusQuery = { __typename?: 'Query', getIndicatorHealthStatus: { __typename?: 'AdminPanelHealthServiceData', status: AdminPanelHealthServiceStatus, details?: string | null, queues?: Array<{ __typename?: 'AdminPanelWorkerQueueHealth', name: string, status: AdminPanelHealthServiceStatus, workers: number, metrics: { __typename?: 'WorkerQueueMetrics', failed: number, completed: number, waiting: number, active: number, delayed: number, prioritized: number } }> | null } };
export type GetSystemHealthStatusQueryVariables = Exact<{ [key: string]: never; }>;
export type GetSystemHealthStatusQuery = { __typename?: 'Query', getSystemHealthStatus: { __typename?: 'SystemHealth', database: { __typename?: 'AdminPanelHealthServiceData', status: AdminPanelHealthServiceStatus, details?: string | null }, redis: { __typename?: 'AdminPanelHealthServiceData', status: AdminPanelHealthServiceStatus, details?: string | null }, worker: { __typename?: 'AdminPanelHealthServiceData', status: AdminPanelHealthServiceStatus, queues?: Array<{ __typename?: 'AdminPanelWorkerQueueHealth', name: string, workers: number, status: AdminPanelHealthServiceStatus, metrics: { __typename?: 'WorkerQueueMetrics', failed: number, completed: number, waiting: number, active: number, delayed: number, prioritized: number } }> | null }, messageSync: { __typename?: 'AdminPanelHealthServiceData', status: AdminPanelHealthServiceStatus, details?: string | null } } };
export type UpdateLabPublicFeatureFlagMutationVariables = Exact<{
input: UpdateLabPublicFeatureFlagInput;
}>;
@ -3960,6 +4024,116 @@ export function useGetEnvironmentVariablesGroupedLazyQuery(baseOptions?: Apollo.
export type GetEnvironmentVariablesGroupedQueryHookResult = ReturnType<typeof useGetEnvironmentVariablesGroupedQuery>;
export type GetEnvironmentVariablesGroupedLazyQueryHookResult = ReturnType<typeof useGetEnvironmentVariablesGroupedLazyQuery>;
export type GetEnvironmentVariablesGroupedQueryResult = Apollo.QueryResult<GetEnvironmentVariablesGroupedQuery, GetEnvironmentVariablesGroupedQueryVariables>;
export const GetIndicatorHealthStatusDocument = gql`
query GetIndicatorHealthStatus($indicatorName: AdminPanelIndicatorHealthStatusInputEnum!) {
getIndicatorHealthStatus(indicatorName: $indicatorName) {
status
details
queues {
name
status
workers
metrics {
failed
completed
waiting
active
delayed
prioritized
}
}
}
}
`;
/**
* __useGetIndicatorHealthStatusQuery__
*
* To run a query within a React component, call `useGetIndicatorHealthStatusQuery` and pass it any options that fit your needs.
* When your component renders, `useGetIndicatorHealthStatusQuery` returns an object from Apollo Client that contains loading, error, and data properties
* you can use to render your UI.
*
* @param baseOptions options that will be passed into the query, supported options are listed on: https://www.apollographql.com/docs/react/api/react-hooks/#options;
*
* @example
* const { data, loading, error } = useGetIndicatorHealthStatusQuery({
* variables: {
* indicatorName: // value for 'indicatorName'
* },
* });
*/
export function useGetIndicatorHealthStatusQuery(baseOptions: Apollo.QueryHookOptions<GetIndicatorHealthStatusQuery, GetIndicatorHealthStatusQueryVariables>) {
const options = {...defaultOptions, ...baseOptions}
return Apollo.useQuery<GetIndicatorHealthStatusQuery, GetIndicatorHealthStatusQueryVariables>(GetIndicatorHealthStatusDocument, options);
}
export function useGetIndicatorHealthStatusLazyQuery(baseOptions?: Apollo.LazyQueryHookOptions<GetIndicatorHealthStatusQuery, GetIndicatorHealthStatusQueryVariables>) {
const options = {...defaultOptions, ...baseOptions}
return Apollo.useLazyQuery<GetIndicatorHealthStatusQuery, GetIndicatorHealthStatusQueryVariables>(GetIndicatorHealthStatusDocument, options);
}
export type GetIndicatorHealthStatusQueryHookResult = ReturnType<typeof useGetIndicatorHealthStatusQuery>;
export type GetIndicatorHealthStatusLazyQueryHookResult = ReturnType<typeof useGetIndicatorHealthStatusLazyQuery>;
export type GetIndicatorHealthStatusQueryResult = Apollo.QueryResult<GetIndicatorHealthStatusQuery, GetIndicatorHealthStatusQueryVariables>;
export const GetSystemHealthStatusDocument = gql`
query GetSystemHealthStatus {
getSystemHealthStatus {
database {
status
details
}
redis {
status
details
}
worker {
status
queues {
name
workers
status
metrics {
failed
completed
waiting
active
delayed
prioritized
}
}
}
messageSync {
status
details
}
}
}
`;
/**
* __useGetSystemHealthStatusQuery__
*
* To run a query within a React component, call `useGetSystemHealthStatusQuery` and pass it any options that fit your needs.
* When your component renders, `useGetSystemHealthStatusQuery` returns an object from Apollo Client that contains loading, error, and data properties
* you can use to render your UI.
*
* @param baseOptions options that will be passed into the query, supported options are listed on: https://www.apollographql.com/docs/react/api/react-hooks/#options;
*
* @example
* const { data, loading, error } = useGetSystemHealthStatusQuery({
* variables: {
* },
* });
*/
export function useGetSystemHealthStatusQuery(baseOptions?: Apollo.QueryHookOptions<GetSystemHealthStatusQuery, GetSystemHealthStatusQueryVariables>) {
const options = {...defaultOptions, ...baseOptions}
return Apollo.useQuery<GetSystemHealthStatusQuery, GetSystemHealthStatusQueryVariables>(GetSystemHealthStatusDocument, options);
}
export function useGetSystemHealthStatusLazyQuery(baseOptions?: Apollo.LazyQueryHookOptions<GetSystemHealthStatusQuery, GetSystemHealthStatusQueryVariables>) {
const options = {...defaultOptions, ...baseOptions}
return Apollo.useLazyQuery<GetSystemHealthStatusQuery, GetSystemHealthStatusQueryVariables>(GetSystemHealthStatusDocument, options);
}
export type GetSystemHealthStatusQueryHookResult = ReturnType<typeof useGetSystemHealthStatusQuery>;
export type GetSystemHealthStatusLazyQueryHookResult = ReturnType<typeof useGetSystemHealthStatusLazyQuery>;
export type GetSystemHealthStatusQueryResult = Apollo.QueryResult<GetSystemHealthStatusQuery, GetSystemHealthStatusQueryVariables>;
export const UpdateLabPublicFeatureFlagDocument = gql`
mutation UpdateLabPublicFeatureFlag($input: UpdateLabPublicFeatureFlagInput!) {
updateLabPublicFeatureFlag(input: $input) {