[messaing] improve messaging import (#4650)

* [messaging] improve full-sync fetching strategy

* fix

* rebase

* fix

* fix

* fix rebase

* fix

* fix

* fix

* fix

* fix

* remove deletion

* fix setPop with memory storage

* fix pgBoss and remove unnecessary job

* fix throw

* fix

* add timeout to ongoing sync
This commit is contained in:
Weiko
2024-03-27 12:44:03 +01:00
committed by GitHub
parent 5c0b65eecb
commit 5c40e3608b
48 changed files with 1728 additions and 168 deletions

View File

@ -0,0 +1,31 @@
import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { FeatureFlagEntity } from 'src/engine/core-modules/feature-flag/feature-flag.entity';
import { ObjectMetadataRepositoryModule } from 'src/engine/object-metadata-repository/object-metadata-repository.module';
import { WorkspaceDataSourceModule } from 'src/engine/workspace-datasource/workspace-datasource.module';
import { BlocklistObjectMetadata } from 'src/modules/connected-account/standard-objects/blocklist.object-metadata';
import { ConnectedAccountObjectMetadata } from 'src/modules/connected-account/standard-objects/connected-account.object-metadata';
import { FetchMessagesByBatchesModule } from 'src/modules/messaging/services/fetch-messages-by-batches/fetch-messages-by-batches.module';
import { GmailFullSyncV2Service } from 'src/modules/messaging/services/gmail-full-sync-v2/gmail-full-sync.v2.service';
import { MessagingProvidersModule } from 'src/modules/messaging/services/providers/messaging-providers.module';
import { MessageChannelMessageAssociationObjectMetadata } from 'src/modules/messaging/standard-objects/message-channel-message-association.object-metadata';
import { MessageChannelObjectMetadata } from 'src/modules/messaging/standard-objects/message-channel.object-metadata';
@Module({
imports: [
MessagingProvidersModule,
FetchMessagesByBatchesModule,
ObjectMetadataRepositoryModule.forFeature([
ConnectedAccountObjectMetadata,
MessageChannelObjectMetadata,
MessageChannelMessageAssociationObjectMetadata,
BlocklistObjectMetadata,
]),
TypeOrmModule.forFeature([FeatureFlagEntity], 'core'),
WorkspaceDataSourceModule,
],
providers: [GmailFullSyncV2Service],
exports: [GmailFullSyncV2Service],
})
export class GmailFullSynV2Module {}

View File

@ -0,0 +1,302 @@
import { Injectable, Logger } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { EntityManager, Repository } from 'typeorm';
import { gmail_v1 } from 'googleapis';
import {
FeatureFlagEntity,
FeatureFlagKeys,
} from 'src/engine/core-modules/feature-flag/feature-flag.entity';
import { CacheStorageService } from 'src/engine/integrations/cache-storage/cache-storage.service';
import { InjectCacheStorage } from 'src/engine/integrations/cache-storage/decorators/cache-storage.decorator';
import { CacheStorageNamespace } from 'src/engine/integrations/cache-storage/types/cache-storage-namespace.enum';
import { InjectObjectMetadataRepository } from 'src/engine/object-metadata-repository/object-metadata-repository.decorator';
import { BlocklistRepository } from 'src/modules/connected-account/repositories/blocklist.repository';
import { ConnectedAccountRepository } from 'src/modules/connected-account/repositories/connected-account.repository';
import { BlocklistObjectMetadata } from 'src/modules/connected-account/standard-objects/blocklist.object-metadata';
import { ConnectedAccountObjectMetadata } from 'src/modules/connected-account/standard-objects/connected-account.object-metadata';
import { GMAIL_USERS_MESSAGES_LIST_MAX_RESULT } from 'src/modules/messaging/constants/gmail-users-messages-list-max-result.constant';
import { MessageChannelMessageAssociationRepository } from 'src/modules/messaging/repositories/message-channel-message-association.repository';
import { MessageChannelRepository } from 'src/modules/messaging/repositories/message-channel.repository';
import { GmailClientProvider } from 'src/modules/messaging/services/providers/gmail/gmail-client.provider';
import { MessageChannelMessageAssociationObjectMetadata } from 'src/modules/messaging/standard-objects/message-channel-message-association.object-metadata';
import {
MessageChannelObjectMetadata,
MessageChannelSyncStatus,
} from 'src/modules/messaging/standard-objects/message-channel.object-metadata';
import { gmailSearchFilterExcludeEmails } from 'src/modules/messaging/utils/gmail-search-filter.util';
import { WorkspaceDataSourceService } from 'src/engine/workspace-datasource/workspace-datasource.service';
@Injectable()
export class GmailFullSyncV2Service {
private readonly logger = new Logger(GmailFullSyncV2Service.name);
constructor(
private readonly gmailClientProvider: GmailClientProvider,
@InjectObjectMetadataRepository(ConnectedAccountObjectMetadata)
private readonly connectedAccountRepository: ConnectedAccountRepository,
@InjectObjectMetadataRepository(MessageChannelObjectMetadata)
private readonly messageChannelRepository: MessageChannelRepository,
@InjectObjectMetadataRepository(BlocklistObjectMetadata)
private readonly blocklistRepository: BlocklistRepository,
@InjectRepository(FeatureFlagEntity, 'core')
private readonly featureFlagRepository: Repository<FeatureFlagEntity>,
@InjectCacheStorage(CacheStorageNamespace.Messaging)
private readonly cacheStorage: CacheStorageService,
@InjectObjectMetadataRepository(
MessageChannelMessageAssociationObjectMetadata,
)
private readonly messageChannelMessageAssociationRepository: MessageChannelMessageAssociationRepository,
private readonly workspaceDataSourceService: WorkspaceDataSourceService,
) {}
public async fetchConnectedAccountThreads(
workspaceId: string,
connectedAccountId: string,
) {
const connectedAccount = await this.connectedAccountRepository.getById(
connectedAccountId,
workspaceId,
);
if (!connectedAccount) {
this.logger.error(
`Connected account ${connectedAccountId} not found in workspace ${workspaceId}`,
);
return;
}
const refreshToken = connectedAccount.refreshToken;
if (!refreshToken) {
throw new Error(
`No refresh token found for connected account ${connectedAccountId} in workspace ${workspaceId}`,
);
}
const gmailMessageChannel =
await this.messageChannelRepository.getFirstByConnectedAccountId(
connectedAccountId,
workspaceId,
);
if (!gmailMessageChannel) {
this.logger.error(
`No message channel found for connected account ${connectedAccountId} in workspace ${workspaceId}`,
);
return;
}
if (gmailMessageChannel.syncStatus === MessageChannelSyncStatus.ONGOING) {
this.logger.log(
`Messaging import for workspace ${workspaceId} and account ${connectedAccountId} is locked, import will be retried later.`,
);
return;
}
await this.messageChannelRepository.updateSyncStatus(
gmailMessageChannel.id,
MessageChannelSyncStatus.ONGOING,
workspaceId,
);
const workspaceDataSource =
await this.workspaceDataSourceService.connectToWorkspaceDataSource(
workspaceId,
);
await workspaceDataSource
?.transaction(async (transactionManager) => {
const gmailClient: gmail_v1.Gmail =
await this.gmailClientProvider.getGmailClient(refreshToken);
const blocklistedEmails = await this.fetchBlocklistEmails(
connectedAccount.accountOwnerId,
workspaceId,
);
await this.fetchAllMessageIdsFromGmailAndStoreInCache(
gmailClient,
gmailMessageChannel.id,
blocklistedEmails,
workspaceId,
transactionManager,
);
await this.messageChannelRepository.updateSyncStatus(
gmailMessageChannel.id,
MessageChannelSyncStatus.PENDING,
workspaceId,
transactionManager,
);
})
.catch(async (error) => {
await this.messageChannelRepository.updateSyncStatus(
gmailMessageChannel.id,
MessageChannelSyncStatus.FAILED,
workspaceId,
);
throw new Error(
`Error fetching messages for ${connectedAccountId} in workspace ${workspaceId}: ${error.message}`,
);
});
}
public async fetchAllMessageIdsFromGmailAndStoreInCache(
gmailClient: gmail_v1.Gmail,
messageChannelId: string,
blocklistedEmails: string[],
workspaceId: string,
transactionManager?: EntityManager,
) {
let pageToken: string | undefined;
let hasMoreMessages = true;
let messageIdsToFetch = 0;
let firstMessageExternalId;
while (hasMoreMessages) {
const response = await gmailClient.users.messages.list({
userId: 'me',
maxResults: GMAIL_USERS_MESSAGES_LIST_MAX_RESULT,
pageToken,
q: gmailSearchFilterExcludeEmails(blocklistedEmails),
});
if (response.data?.messages) {
const messageExternalIds = response.data.messages
.filter((message): message is { id: string } => message.id != null)
.map((message) => message.id);
if (!firstMessageExternalId) {
firstMessageExternalId = messageExternalIds[0];
}
const existingMessageChannelMessageAssociations =
await this.messageChannelMessageAssociationRepository.getByMessageExternalIdsAndMessageChannelId(
messageExternalIds,
messageChannelId,
workspaceId,
transactionManager,
);
const existingMessageChannelMessageAssociationsExternalIds =
existingMessageChannelMessageAssociations.map(
(messageChannelMessageAssociation) =>
messageChannelMessageAssociation.messageExternalId,
);
const messageIdsToImport = messageExternalIds.filter(
(messageExternalId) =>
!existingMessageChannelMessageAssociationsExternalIds.includes(
messageExternalId,
),
);
if (messageIdsToImport && messageIdsToImport.length) {
await this.cacheStorage.setAdd(
`messages-to-import:${workspaceId}:gmail:${messageChannelId}`,
messageIdsToImport,
);
messageIdsToFetch += messageIdsToImport.length;
}
}
pageToken = response.data.nextPageToken ?? undefined;
hasMoreMessages = !!pageToken;
}
if (!messageIdsToFetch) {
this.logger.log(
`No messages found in Gmail for messageChannel ${messageChannelId} in workspace ${workspaceId}`,
);
return;
}
this.logger.log(
`Fetched all ${messageIdsToFetch} message ids from Gmail for messageChannel ${messageChannelId} in workspace ${workspaceId} and added to cache for import`,
);
await this.updateLastSyncExternalId(
gmailClient,
messageChannelId,
firstMessageExternalId,
workspaceId,
transactionManager,
);
}
public async fetchBlocklistEmails(
workspaceMemberId: string,
workspaceId: string,
) {
const isBlocklistEnabledFeatureFlag =
await this.featureFlagRepository.findOneBy({
workspaceId,
key: FeatureFlagKeys.IsBlocklistEnabled,
value: true,
});
const isBlocklistEnabled =
isBlocklistEnabledFeatureFlag && isBlocklistEnabledFeatureFlag.value;
const blocklist = isBlocklistEnabled
? await this.blocklistRepository.getByWorkspaceMemberId(
workspaceMemberId,
workspaceId,
)
: [];
return blocklist.map((blocklist) => blocklist.handle);
}
private async updateLastSyncExternalId(
gmailClient: gmail_v1.Gmail,
messageChannelId: string,
firstMessageExternalId: string,
workspaceId: string,
transactionManager?: EntityManager,
) {
if (!firstMessageExternalId) {
throw new Error(
`No first message found for workspace ${workspaceId} and account ${messageChannelId}, can't update sync external id`,
);
}
const firstMessageContent = await gmailClient.users.messages.get({
userId: 'me',
id: firstMessageExternalId,
});
if (!firstMessageContent?.data) {
throw new Error(
`No first message content found for message ${firstMessageExternalId} in workspace ${workspaceId}`,
);
}
const historyId = firstMessageContent?.data?.historyId;
if (!historyId) {
throw new Error(
`No historyId found for message ${firstMessageExternalId} in workspace ${workspaceId}`,
);
}
this.logger.log(
`Updating last external id: ${historyId} for workspace ${workspaceId} and account ${messageChannelId} succeeded.`,
);
await this.messageChannelRepository.updateLastSyncExternalIdIfHigher(
messageChannelId,
historyId,
workspaceId,
transactionManager,
);
}
}