feat: fetch and parse full gmail message (#5160)
first part of https://github.com/twentyhq/twenty/issues/4108 related PR https://github.com/twentyhq/twenty/pull/5081 --------- Co-authored-by: Charles Bochet <charles@twenty.com>
This commit is contained in:
@ -1 +1 @@
|
||||
export const GMAIL_USERS_MESSAGES_GET_BATCH_SIZE = 10;
|
||||
export const GMAIL_USERS_MESSAGES_GET_BATCH_SIZE = 20;
|
||||
|
||||
@ -1,14 +1,15 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
|
||||
import { AxiosResponse } from 'axios';
|
||||
import { simpleParser } from 'mailparser';
|
||||
import planer from 'planer';
|
||||
import addressparser from 'addressparser';
|
||||
import { gmail_v1 } from 'googleapis';
|
||||
|
||||
import { GmailMessage } from 'src/modules/messaging/types/gmail-message';
|
||||
import { MessageQuery } from 'src/modules/messaging/types/message-or-thread-query';
|
||||
import { GmailMessageParsedResponse } from 'src/modules/messaging/types/gmail-message-parsed-response';
|
||||
import { FetchByBatchesService } from 'src/modules/messaging/services/fetch-by-batch/fetch-by-batch.service';
|
||||
import { formatAddressObjectAsParticipants } from 'src/modules/messaging/services/utils/format-address-object-as-participants.util';
|
||||
import { assert, assertNotNull } from 'src/utils/assert';
|
||||
|
||||
@Injectable()
|
||||
export class FetchMessagesByBatchesService {
|
||||
@ -19,9 +20,9 @@ export class FetchMessagesByBatchesService {
|
||||
async fetchAllMessages(
|
||||
queries: MessageQuery[],
|
||||
accessToken: string,
|
||||
workspaceId?: string,
|
||||
connectedAccountId?: string,
|
||||
): Promise<{ messages: GmailMessage[]; errors: any[] }> {
|
||||
workspaceId: string,
|
||||
connectedAccountId: string,
|
||||
): Promise<GmailMessage[]> {
|
||||
let startTime = Date.now();
|
||||
const batchResponses = await this.fetchByBatchesService.fetchAllByBatches(
|
||||
queries,
|
||||
@ -38,8 +39,11 @@ export class FetchMessagesByBatchesService {
|
||||
|
||||
startTime = Date.now();
|
||||
|
||||
const formattedResponse =
|
||||
await this.formatBatchResponsesAsGmailMessages(batchResponses);
|
||||
const formattedResponse = this.formatBatchResponsesAsGmailMessages(
|
||||
batchResponses,
|
||||
workspaceId,
|
||||
connectedAccountId,
|
||||
);
|
||||
|
||||
endTime = Date.now();
|
||||
|
||||
@ -52,109 +56,172 @@ export class FetchMessagesByBatchesService {
|
||||
return formattedResponse;
|
||||
}
|
||||
|
||||
async formatBatchResponseAsGmailMessage(
|
||||
private formatBatchResponseAsGmailMessage(
|
||||
responseCollection: AxiosResponse<any, any>,
|
||||
): Promise<{ messages: GmailMessage[]; errors: any[] }> {
|
||||
const parsedResponses = this.fetchByBatchesService.parseBatch(
|
||||
responseCollection,
|
||||
) as GmailMessageParsedResponse[];
|
||||
|
||||
const errors: any = [];
|
||||
workspaceId: string,
|
||||
connectedAccountId: string,
|
||||
): GmailMessage[] {
|
||||
const parsedResponses =
|
||||
this.fetchByBatchesService.parseBatch(responseCollection);
|
||||
|
||||
const sanitizeString = (str: string) => {
|
||||
return str.replace(/\0/g, '');
|
||||
};
|
||||
|
||||
const formattedResponse = Promise.all(
|
||||
parsedResponses.map(async (message: GmailMessageParsedResponse) => {
|
||||
if (message.error) {
|
||||
errors.push(message.error);
|
||||
const formattedResponse = parsedResponses.map(
|
||||
(response): GmailMessage | null => {
|
||||
if ('error' in response) {
|
||||
if (response.error.code === 404) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return;
|
||||
throw response.error;
|
||||
}
|
||||
|
||||
const { historyId, id, threadId, internalDate, raw } = message;
|
||||
const {
|
||||
historyId,
|
||||
id,
|
||||
threadId,
|
||||
internalDate,
|
||||
subject,
|
||||
from,
|
||||
to,
|
||||
cc,
|
||||
bcc,
|
||||
headerMessageId,
|
||||
text,
|
||||
attachments,
|
||||
deliveredTo,
|
||||
} = this.parseGmailMessage(response);
|
||||
|
||||
const body = atob(raw?.replace(/-/g, '+').replace(/_/g, '/'));
|
||||
if (!from) {
|
||||
this.logger.log(
|
||||
`From value is missing while importing message in workspace ${workspaceId} and account ${connectedAccountId}`,
|
||||
);
|
||||
|
||||
try {
|
||||
const parsed = await simpleParser(body, {
|
||||
skipHtmlToText: true,
|
||||
skipImageLinks: true,
|
||||
skipTextToHtml: true,
|
||||
maxHtmlLengthToParse: 0,
|
||||
});
|
||||
|
||||
const { subject, messageId, from, to, cc, bcc, text, attachments } =
|
||||
parsed;
|
||||
|
||||
if (!from) throw new Error('From value is missing');
|
||||
|
||||
const participants = [
|
||||
...formatAddressObjectAsParticipants(from, 'from'),
|
||||
...formatAddressObjectAsParticipants(to, 'to'),
|
||||
...formatAddressObjectAsParticipants(cc, 'cc'),
|
||||
...formatAddressObjectAsParticipants(bcc, 'bcc'),
|
||||
];
|
||||
|
||||
let textWithoutReplyQuotations = text;
|
||||
|
||||
if (text)
|
||||
try {
|
||||
textWithoutReplyQuotations = planer.extractFrom(
|
||||
text,
|
||||
'text/plain',
|
||||
);
|
||||
} catch (error) {
|
||||
console.log(
|
||||
'Error while trying to remove reply quotations',
|
||||
error,
|
||||
);
|
||||
}
|
||||
|
||||
const messageFromGmail: GmailMessage = {
|
||||
historyId,
|
||||
externalId: id,
|
||||
headerMessageId: messageId || '',
|
||||
subject: subject || '',
|
||||
messageThreadExternalId: threadId,
|
||||
internalDate,
|
||||
fromHandle: from.value[0].address || '',
|
||||
fromDisplayName: from.value[0].name || '',
|
||||
participants,
|
||||
text: sanitizeString(textWithoutReplyQuotations || ''),
|
||||
attachments,
|
||||
};
|
||||
|
||||
return messageFromGmail;
|
||||
} catch (error) {
|
||||
console.log('Error', error);
|
||||
|
||||
errors.push(error);
|
||||
return null;
|
||||
}
|
||||
}),
|
||||
|
||||
if (!to && !deliveredTo && !bcc && !cc) {
|
||||
this.logger.log(
|
||||
`To, Delivered-To, Bcc or Cc value is missing while importing message in workspace ${workspaceId} and account ${connectedAccountId}`,
|
||||
);
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
const participants = [
|
||||
...formatAddressObjectAsParticipants(from, 'from'),
|
||||
...formatAddressObjectAsParticipants(to ?? deliveredTo, 'to'),
|
||||
...formatAddressObjectAsParticipants(cc, 'cc'),
|
||||
...formatAddressObjectAsParticipants(bcc, 'bcc'),
|
||||
];
|
||||
|
||||
let textWithoutReplyQuotations = text;
|
||||
|
||||
if (text) {
|
||||
textWithoutReplyQuotations = planer.extractFrom(text, 'text/plain');
|
||||
}
|
||||
|
||||
const messageFromGmail: GmailMessage = {
|
||||
historyId,
|
||||
externalId: id,
|
||||
headerMessageId,
|
||||
subject: subject || '',
|
||||
messageThreadExternalId: threadId,
|
||||
internalDate,
|
||||
fromHandle: from[0].address || '',
|
||||
fromDisplayName: from[0].name || '',
|
||||
participants,
|
||||
text: sanitizeString(textWithoutReplyQuotations || ''),
|
||||
attachments,
|
||||
};
|
||||
|
||||
return messageFromGmail;
|
||||
},
|
||||
);
|
||||
|
||||
const filteredMessages = (await formattedResponse).filter(
|
||||
(message) => message,
|
||||
const filteredMessages = formattedResponse.filter((message) =>
|
||||
assertNotNull(message),
|
||||
) as GmailMessage[];
|
||||
|
||||
return { messages: filteredMessages, errors };
|
||||
return filteredMessages;
|
||||
}
|
||||
|
||||
async formatBatchResponsesAsGmailMessages(
|
||||
private formatBatchResponsesAsGmailMessages(
|
||||
batchResponses: AxiosResponse<any, any>[],
|
||||
): Promise<{ messages: GmailMessage[]; errors: any[] }> {
|
||||
const messagesAndErrors = await Promise.all(
|
||||
batchResponses.map(async (response) => {
|
||||
return this.formatBatchResponseAsGmailMessage(response);
|
||||
}),
|
||||
workspaceId: string,
|
||||
connectedAccountId: string,
|
||||
): GmailMessage[] {
|
||||
const messageBatches = batchResponses.map((response) => {
|
||||
return this.formatBatchResponseAsGmailMessage(
|
||||
response,
|
||||
workspaceId,
|
||||
connectedAccountId,
|
||||
);
|
||||
});
|
||||
|
||||
return messageBatches.flat();
|
||||
}
|
||||
|
||||
private parseGmailMessage(message: gmail_v1.Schema$Message) {
|
||||
const subject = this.getPropertyFromHeaders(message, 'Subject');
|
||||
const rawFrom = this.getPropertyFromHeaders(message, 'From');
|
||||
const rawTo = this.getPropertyFromHeaders(message, 'To');
|
||||
const rawDeliveredTo = this.getPropertyFromHeaders(message, 'Delivered-To');
|
||||
const rawCc = this.getPropertyFromHeaders(message, 'Cc');
|
||||
const rawBcc = this.getPropertyFromHeaders(message, 'Bcc');
|
||||
const messageId = this.getPropertyFromHeaders(message, 'Message-ID');
|
||||
const id = message.id;
|
||||
const threadId = message.threadId;
|
||||
const historyId = message.historyId;
|
||||
const internalDate = message.internalDate;
|
||||
|
||||
assert(id);
|
||||
assert(messageId);
|
||||
assert(threadId);
|
||||
assert(historyId);
|
||||
assert(internalDate);
|
||||
|
||||
const bodyData = this.getBodyData(message);
|
||||
const text = bodyData ? Buffer.from(bodyData, 'base64').toString() : '';
|
||||
|
||||
return {
|
||||
id,
|
||||
headerMessageId: messageId,
|
||||
threadId,
|
||||
historyId,
|
||||
internalDate,
|
||||
subject,
|
||||
from: rawFrom ? addressparser(rawFrom) : undefined,
|
||||
deliveredTo: rawDeliveredTo ? addressparser(rawDeliveredTo) : undefined,
|
||||
to: rawTo ? addressparser(rawTo) : undefined,
|
||||
cc: rawCc ? addressparser(rawCc) : undefined,
|
||||
bcc: rawBcc ? addressparser(rawBcc) : undefined,
|
||||
text,
|
||||
attachments: [],
|
||||
};
|
||||
}
|
||||
|
||||
private getBodyData(message: gmail_v1.Schema$Message) {
|
||||
const firstPart = message.payload?.parts?.[0];
|
||||
|
||||
if (firstPart?.mimeType === 'text/plain') {
|
||||
return firstPart?.body?.data;
|
||||
}
|
||||
|
||||
return firstPart?.parts?.find((part) => part.mimeType === 'text/plain')
|
||||
?.body?.data;
|
||||
}
|
||||
|
||||
private getPropertyFromHeaders(
|
||||
message: gmail_v1.Schema$Message,
|
||||
property: string,
|
||||
) {
|
||||
const header = message.payload?.headers?.find(
|
||||
(header) => header.name?.toLowerCase() === property.toLowerCase(),
|
||||
);
|
||||
|
||||
const messages = messagesAndErrors.map((item) => item.messages).flat();
|
||||
|
||||
const errors = messagesAndErrors.map((item) => item.errors).flat();
|
||||
|
||||
return { messages, errors };
|
||||
return header?.value;
|
||||
}
|
||||
}
|
||||
|
||||
@ -174,7 +174,7 @@ export class GmailFetchMessageContentFromCacheService {
|
||||
const messageQueries = createQueriesFromMessageIds(messageIdsToFetch);
|
||||
|
||||
try {
|
||||
const { messages: messagesToSave, errors } =
|
||||
const messagesToSave =
|
||||
await this.fetchMessagesByBatchesService.fetchAllMessages(
|
||||
messageQueries,
|
||||
accessToken,
|
||||
@ -194,22 +194,6 @@ export class GmailFetchMessageContentFromCacheService {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (errors.length) {
|
||||
const errorsCanBeIgnored = errors.every(
|
||||
(error) => error.code === 404,
|
||||
);
|
||||
|
||||
if (!errorsCanBeIgnored) {
|
||||
throw new Error(
|
||||
`Error fetching messages for ${connectedAccountId} in workspace ${workspaceId}: ${JSON.stringify(
|
||||
errors,
|
||||
null,
|
||||
2,
|
||||
)}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
const messageExternalIdsAndIdsMap =
|
||||
await this.messageService.saveMessagesWithinTransaction(
|
||||
messagesToSave,
|
||||
@ -292,21 +276,19 @@ export class GmailFetchMessageContentFromCacheService {
|
||||
messageIdsToFetch,
|
||||
);
|
||||
|
||||
if (error?.message?.code === 429) {
|
||||
this.logger.error(
|
||||
`Error fetching messages for ${connectedAccountId} in workspace ${workspaceId}: Resource has been exhausted, locking for ${GMAIL_ONGOING_SYNC_TIMEOUT}ms...`,
|
||||
);
|
||||
await this.messageChannelRepository.updateSyncStatus(
|
||||
gmailMessageChannelId,
|
||||
MessageChannelSyncStatus.FAILED,
|
||||
workspaceId,
|
||||
);
|
||||
|
||||
await this.messageChannelRepository.updateSyncStatus(
|
||||
gmailMessageChannelId,
|
||||
MessageChannelSyncStatus.FAILED,
|
||||
workspaceId,
|
||||
);
|
||||
this.logger.error(
|
||||
`Error fetching messages for ${connectedAccountId} in workspace ${workspaceId}: locking for ${GMAIL_ONGOING_SYNC_TIMEOUT}ms...`,
|
||||
);
|
||||
|
||||
throw new Error(
|
||||
`Error fetching messages for ${connectedAccountId} in workspace ${workspaceId}: ${error.message}`,
|
||||
);
|
||||
}
|
||||
throw new Error(
|
||||
`Error fetching messages for ${connectedAccountId} in workspace ${workspaceId}: ${error.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -1,10 +1,10 @@
|
||||
import { AddressObject } from 'mailparser';
|
||||
import addressparser from 'addressparser';
|
||||
|
||||
import { Participant } from 'src/modules/messaging/types/gmail-message';
|
||||
|
||||
const formatAddressObjectAsArray = (
|
||||
addressObject: AddressObject | AddressObject[],
|
||||
): AddressObject[] => {
|
||||
addressObject: addressparser.EmailAddress | addressparser.EmailAddress[],
|
||||
): addressparser.EmailAddress[] => {
|
||||
return Array.isArray(addressObject) ? addressObject : [addressObject];
|
||||
};
|
||||
|
||||
@ -13,24 +13,23 @@ const removeSpacesAndLowerCase = (email: string): string => {
|
||||
};
|
||||
|
||||
export const formatAddressObjectAsParticipants = (
|
||||
addressObject: AddressObject | AddressObject[] | undefined,
|
||||
addressObject:
|
||||
| addressparser.EmailAddress
|
||||
| addressparser.EmailAddress[]
|
||||
| undefined,
|
||||
role: 'from' | 'to' | 'cc' | 'bcc',
|
||||
): Participant[] => {
|
||||
if (!addressObject) return [];
|
||||
const addressObjects = formatAddressObjectAsArray(addressObject);
|
||||
|
||||
const participants = addressObjects.map((addressObject) => {
|
||||
const emailAdresses = addressObject.value;
|
||||
const address = addressObject.address;
|
||||
|
||||
return emailAdresses.map((emailAddress) => {
|
||||
const { name, address } = emailAddress;
|
||||
|
||||
return {
|
||||
role,
|
||||
handle: address ? removeSpacesAndLowerCase(address) : '',
|
||||
displayName: name || '',
|
||||
};
|
||||
});
|
||||
return {
|
||||
role,
|
||||
handle: address ? removeSpacesAndLowerCase(address) : '',
|
||||
displayName: addressObject.name || '',
|
||||
};
|
||||
});
|
||||
|
||||
return participants.flat();
|
||||
|
||||
@ -1,15 +1,13 @@
|
||||
export type GmailMessageParsedResponse = {
|
||||
id: string;
|
||||
threadId: string;
|
||||
labelIds: string[];
|
||||
snippet: string;
|
||||
sizeEstimate: number;
|
||||
raw: string;
|
||||
historyId: string;
|
||||
internalDate: string;
|
||||
error?: {
|
||||
import { gmail_v1 } from 'googleapis';
|
||||
|
||||
type GmailMessageError = {
|
||||
error: {
|
||||
code: number;
|
||||
message: string;
|
||||
status: string;
|
||||
};
|
||||
};
|
||||
|
||||
export type GmailMessageParsedResponse =
|
||||
| gmail_v1.Schema$Message
|
||||
| GmailMessageError;
|
||||
|
||||
@ -1,5 +1,3 @@
|
||||
import { Attachment } from 'mailparser';
|
||||
|
||||
export type GmailMessage = {
|
||||
historyId: string;
|
||||
externalId: string;
|
||||
@ -25,3 +23,10 @@ export type ParticipantWithMessageId = Participant & { messageId: string };
|
||||
export type ParticipantWithId = Participant & {
|
||||
id: string;
|
||||
};
|
||||
|
||||
export type Attachment = {
|
||||
id: string;
|
||||
filename: string;
|
||||
size: number;
|
||||
mimeType: string;
|
||||
};
|
||||
|
||||
@ -4,6 +4,6 @@ export const createQueriesFromMessageIds = (
|
||||
messageExternalIds: string[],
|
||||
): MessageQuery[] => {
|
||||
return messageExternalIds.map((messageId) => ({
|
||||
uri: '/gmail/v1/users/me/messages/' + messageId + '?format=RAW',
|
||||
uri: '/gmail/v1/users/me/messages/' + messageId + '?format=FULL',
|
||||
}));
|
||||
};
|
||||
|
||||
Reference in New Issue
Block a user