Text-to-SQL proof of concept (#5788)

Added:
- An "Ask AI" command to the command menu.
- A simple GraphQL resolver that converts the user's question into a
relevant SQL query using an LLM, runs the query, and returns the result.

<img width="428" alt="Screenshot 2024-06-09 at 20 53 09"
src="https://github.com/twentyhq/twenty/assets/171685816/57127f37-d4a6-498d-b253-733ffa0d209f">

No security concerns have been addressed, this is only a
proof-of-concept and not intended to be enabled in production.

All changes are behind a feature flag called `IS_ASK_AI_ENABLED`.

---------

Co-authored-by: Félix Malfait <felix.malfait@gmail.com>
This commit is contained in:
ad-elias
2024-07-04 08:57:26 +02:00
committed by GitHub
parent 25fce27fe3
commit 4c642a0bb8
46 changed files with 1463 additions and 40 deletions

View File

@ -0,0 +1,30 @@
import { Module } from '@nestjs/common';
import { TypeOrmModule } from '@nestjs/typeorm';
import { WorkspaceDataSourceModule } from 'src/engine/workspace-datasource/workspace-datasource.module';
import { UserModule } from 'src/engine/core-modules/user/user.module';
import { AISQLQueryResolver } from 'src/engine/core-modules/ai-sql-query/ai-sql-query.resolver';
import { AISQLQueryService } from 'src/engine/core-modules/ai-sql-query/ai-sql-query.service';
import { FeatureFlagEntity } from 'src/engine/core-modules/feature-flag/feature-flag.entity';
import { WorkspaceQueryRunnerModule } from 'src/engine/api/graphql/workspace-query-runner/workspace-query-runner.module';
import { LLMChatModelModule } from 'src/engine/integrations/llm-chat-model/llm-chat-model.module';
import { EnvironmentModule } from 'src/engine/integrations/environment/environment.module';
import { LLMTracingModule } from 'src/engine/integrations/llm-tracing/llm-tracing.module';
import { ObjectMetadataModule } from 'src/engine/metadata-modules/object-metadata/object-metadata.module';
import { WorkspaceSyncMetadataModule } from 'src/engine/workspace-manager/workspace-sync-metadata/workspace-sync-metadata.module';
@Module({
imports: [
WorkspaceDataSourceModule,
WorkspaceQueryRunnerModule,
UserModule,
TypeOrmModule.forFeature([FeatureFlagEntity], 'core'),
LLMChatModelModule,
LLMTracingModule,
EnvironmentModule,
ObjectMetadataModule,
WorkspaceSyncMetadataModule,
],
exports: [],
providers: [AISQLQueryResolver, AISQLQueryService],
})
export class AISQLQueryModule {}

View File

@ -0,0 +1,14 @@
import { PromptTemplate } from '@langchain/core/prompts';
export const sqlGenerationPromptTemplate = PromptTemplate.fromTemplate<{
llmOutputJsonSchema: string;
sqlCreateTableStatements: string;
userQuestion: string;
}>(`Always respond following this JSON Schema: {llmOutputJsonSchema}
Based on the table schema below, write a PostgreSQL query that would answer the user's question. All column names must be enclosed in double quotes.
{sqlCreateTableStatements}
Question: {userQuestion}
SQL Query:`);

View File

@ -0,0 +1,64 @@
import { Args, Query, Resolver, ArgsType, Field } from '@nestjs/graphql';
import { ForbiddenException, UseGuards } from '@nestjs/common';
import { InjectRepository } from '@nestjs/typeorm';
import { Repository } from 'typeorm';
import { User } from 'src/engine/core-modules/user/user.entity';
import { JwtAuthGuard } from 'src/engine/guards/jwt.auth.guard';
import { Workspace } from 'src/engine/core-modules/workspace/workspace.entity';
import { AuthWorkspace } from 'src/engine/decorators/auth/auth-workspace.decorator';
import {
FeatureFlagEntity,
FeatureFlagKeys,
} from 'src/engine/core-modules/feature-flag/feature-flag.entity';
import { AuthUser } from 'src/engine/decorators/auth/auth-user.decorator';
import { AISQLQueryResult } from 'src/engine/core-modules/ai-sql-query/dtos/ai-sql-query-result.dto';
import { AISQLQueryService } from 'src/engine/core-modules/ai-sql-query/ai-sql-query.service';
@ArgsType()
class GetAISQLQueryArgs {
@Field(() => String)
text: string;
}
@UseGuards(JwtAuthGuard)
@Resolver(() => AISQLQueryResult)
export class AISQLQueryResolver {
constructor(
private readonly aiSqlQueryService: AISQLQueryService,
@InjectRepository(FeatureFlagEntity, 'core')
private readonly featureFlagRepository: Repository<FeatureFlagEntity>,
) {}
@Query(() => AISQLQueryResult)
async getAISQLQuery(
@AuthWorkspace() { id: workspaceId }: Workspace,
@AuthUser() user: User,
@Args() { text }: GetAISQLQueryArgs,
) {
const isCopilotEnabledFeatureFlag =
await this.featureFlagRepository.findOneBy({
workspaceId,
key: FeatureFlagKeys.IsCopilotEnabled,
value: true,
});
if (!isCopilotEnabledFeatureFlag?.value) {
throw new ForbiddenException(
`${FeatureFlagKeys.IsCopilotEnabled} feature flag is disabled`,
);
}
const traceMetadata = {
userId: user.id,
userEmail: user.email,
};
return this.aiSqlQueryService.generateAndExecute(
workspaceId,
text,
traceMetadata,
);
}
}

View File

@ -0,0 +1,253 @@
import { Injectable, Logger } from '@nestjs/common';
import { RunnableSequence } from '@langchain/core/runnables';
import { StructuredOutputParser } from '@langchain/core/output_parsers';
import { DataSource, QueryFailedError } from 'typeorm';
import { z } from 'zod';
import { zodToJsonSchema } from 'zod-to-json-schema';
import { PostgresConnectionOptions } from 'typeorm/driver/postgres/PostgresConnectionOptions';
import groupBy from 'lodash.groupby';
import { PartialFieldMetadata } from 'src/engine/workspace-manager/workspace-sync-metadata/interfaces/partial-field-metadata.interface';
import { WorkspaceDataSourceService } from 'src/engine/workspace-datasource/workspace-datasource.service';
import { WorkspaceQueryRunnerService } from 'src/engine/api/graphql/workspace-query-runner/workspace-query-runner.service';
import { LLMChatModelService } from 'src/engine/integrations/llm-chat-model/llm-chat-model.service';
import { LLMTracingService } from 'src/engine/integrations/llm-tracing/llm-tracing.service';
import { ObjectMetadataEntity } from 'src/engine/metadata-modules/object-metadata/object-metadata.entity';
import { DEFAULT_LABEL_IDENTIFIER_FIELD_NAME } from 'src/engine/metadata-modules/object-metadata/object-metadata.constants';
import { StandardObjectFactory } from 'src/engine/workspace-manager/workspace-sync-metadata/factories/standard-object.factory';
import { standardObjectMetadataDefinitions } from 'src/engine/workspace-manager/workspace-sync-metadata/standard-objects';
import { AISQLQueryResult } from 'src/engine/core-modules/ai-sql-query/dtos/ai-sql-query-result.dto';
import { sqlGenerationPromptTemplate } from 'src/engine/core-modules/ai-sql-query/ai-sql-query.prompt-templates';
@Injectable()
export class AISQLQueryService {
private readonly logger = new Logger(AISQLQueryService.name);
constructor(
private readonly workspaceDataSourceService: WorkspaceDataSourceService,
private readonly workspaceQueryRunnerService: WorkspaceQueryRunnerService,
private readonly llmChatModelService: LLMChatModelService,
private readonly llmTracingService: LLMTracingService,
private readonly standardObjectFactory: StandardObjectFactory,
) {}
private getLabelIdentifierName(
objectMetadata: ObjectMetadataEntity,
dataSourceId,
workspaceId,
workspaceFeatureFlagsMap,
): string | undefined {
const customObjectLabelIdentifierFieldMetadata = objectMetadata.fields.find(
(fieldMetadata) =>
fieldMetadata.id === objectMetadata.labelIdentifierFieldMetadataId,
);
const standardObjectMetadataCollection = this.standardObjectFactory.create(
standardObjectMetadataDefinitions,
{ workspaceId, dataSourceId },
workspaceFeatureFlagsMap,
);
const standardObjectLabelIdentifierFieldMetadata =
standardObjectMetadataCollection
.find(
(standardObjectMetadata) =>
standardObjectMetadata.nameSingular === objectMetadata.nameSingular,
)
?.fields.find(
(field: PartialFieldMetadata) =>
field.name === DEFAULT_LABEL_IDENTIFIER_FIELD_NAME,
) as PartialFieldMetadata;
const labelIdentifierFieldMetadata =
customObjectLabelIdentifierFieldMetadata ??
standardObjectLabelIdentifierFieldMetadata;
return (
labelIdentifierFieldMetadata?.name ?? DEFAULT_LABEL_IDENTIFIER_FIELD_NAME
);
}
private async getColInfosByTableName(dataSource: DataSource) {
const { schema } = dataSource.options as PostgresConnectionOptions;
// From LangChain sql_utils.ts
const sqlQuery = `SELECT
t.table_name,
c.*
FROM
information_schema.tables t
JOIN information_schema.columns c
ON t.table_name = c.table_name
WHERE
t.table_schema = '${schema}'
AND c.table_schema = '${schema}'
ORDER BY
t.table_name,
c.ordinal_position;`;
const colInfos = await dataSource.query<
{
table_name: string;
column_name: string;
data_type: string | undefined;
is_nullable: 'YES' | 'NO';
}[]
>(sqlQuery);
return groupBy(colInfos, (colInfo) => colInfo.table_name);
}
private getCreateTableStatement(tableName: string, colInfos: any[]) {
return `${`CREATE TABLE ${tableName} (\n`} ${colInfos
.map(
(colInfo) =>
`${colInfo.column_name} ${colInfo.data_type} ${
colInfo.is_nullable === 'YES' ? '' : 'NOT NULL'
}`,
)
.join(', ')});`;
}
private getRelationDescriptions() {
// TODO - Construct sentences like the following:
// investorId: a foreign key referencing the person table, indicating the investor who owns this portfolio company.
return '';
}
private getTableDescription(tableName: string, colInfos: any[]) {
return [
this.getCreateTableStatement(tableName, colInfos),
this.getRelationDescriptions(),
].join('\n');
}
private async getWorkspaceSchemaDescription(
dataSource: DataSource,
): Promise<string> {
const colInfoByTableName = await this.getColInfosByTableName(dataSource);
return Object.entries(colInfoByTableName)
.map(([tableName, colInfos]) =>
this.getTableDescription(tableName, colInfos),
)
.join('\n\n');
}
private async generateWithDataSource(
workspaceId: string,
workspaceDataSource: DataSource,
userQuestion: string,
traceMetadata: Record<string, string> = {},
) {
const workspaceSchemaName =
this.workspaceDataSourceService.getSchemaName(workspaceId);
workspaceDataSource.setOptions({
schema: workspaceSchemaName,
});
const workspaceSchemaDescription =
await this.getWorkspaceSchemaDescription(workspaceDataSource);
const llmOutputSchema = z.object({
sqlQuery: z.string(),
});
const llmOutputJsonSchema = JSON.stringify(
zodToJsonSchema(llmOutputSchema),
);
const structuredOutputParser =
StructuredOutputParser.fromZodSchema(llmOutputSchema);
const sqlQueryGeneratorChain = RunnableSequence.from([
sqlGenerationPromptTemplate,
this.llmChatModelService.getJSONChatModel(),
structuredOutputParser,
]);
const metadata = {
workspaceId,
...traceMetadata,
};
const tracingCallbackHandler =
this.llmTracingService.getCallbackHandler(metadata);
const { sqlQuery } = await sqlQueryGeneratorChain.invoke(
{
llmOutputJsonSchema,
sqlCreateTableStatements: workspaceSchemaDescription,
userQuestion,
},
{
callbacks: [tracingCallbackHandler],
},
);
return sqlQuery;
}
async generate(
workspaceId: string,
userQuestion: string,
traceMetadata: Record<string, string> = {},
) {
const workspaceDataSource =
await this.workspaceDataSourceService.connectToWorkspaceDataSource(
workspaceId,
);
return this.generateWithDataSource(
workspaceId,
workspaceDataSource,
userQuestion,
traceMetadata,
);
}
async generateAndExecute(
workspaceId: string,
userQuestion: string,
traceMetadata: Record<string, string> = {},
): Promise<AISQLQueryResult> {
const workspaceDataSource =
await this.workspaceDataSourceService.connectToWorkspaceDataSource(
workspaceId,
);
const sqlQuery = await this.generateWithDataSource(
workspaceId,
workspaceDataSource,
userQuestion,
traceMetadata,
);
try {
const sqlQueryResult: Record<string, any>[] =
await this.workspaceQueryRunnerService.executeSQL(
workspaceDataSource,
workspaceId,
sqlQuery,
);
return {
sqlQuery,
sqlQueryResult: JSON.stringify(sqlQueryResult),
};
} catch (error) {
if (error instanceof QueryFailedError) {
return {
sqlQuery,
queryFailedErrorMessage: error.message,
};
}
this.logger.error(error.message, error.stack);
return {
sqlQuery,
};
}
}
}

View File

@ -0,0 +1,17 @@
import { Field, ObjectType } from '@nestjs/graphql';
import { IsOptional } from 'class-validator';
@ObjectType('AISQLQueryResult')
export class AISQLQueryResult {
@Field(() => String)
sqlQuery: string;
@Field(() => String, { nullable: true })
@IsOptional()
sqlQueryResult?: string;
@Field(() => String, { nullable: true })
@IsOptional()
queryFailedErrorMessage?: string;
}