Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion apps/api/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,6 @@ UPSTASH_VECTOR_REST_TOKEN=
# Trigger
TRIGGER_SECRET_KEY=

OPENAI_API_KEY=
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
GROQ_API_KEY=
15 changes: 10 additions & 5 deletions apps/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,28 +4,32 @@
"version": "0.0.1",
"author": "",
"dependencies": {
"@ai-sdk/anthropic": "^2.0.53",
"@ai-sdk/groq": "^2.0.32",
"@ai-sdk/openai": "^2.0.65",
"@prisma/instrumentation": "^6.13.0",
"@trigger.dev/build": "4.0.6",
"@trigger.dev/sdk": "4.0.6",
"@upstash/vector": "^1.2.2",
"@aws-sdk/client-s3": "^3.859.0",
"ai": "^5.0.60",
"@aws-sdk/s3-request-presigner": "^3.859.0",
"@nestjs/common": "^11.0.1",
"@nestjs/config": "^4.0.2",
"@nestjs/core": "^11.0.1",
"@nestjs/platform-express": "^11.1.5",
"@nestjs/swagger": "^11.2.0",
"@prisma/client": "^6.13.0",
"@prisma/instrumentation": "^6.13.0",
"@react-email/components": "^0.0.41",
"@trigger.dev/build": "4.0.6",
"@trigger.dev/sdk": "4.0.6",
"@trycompai/db": "^1.3.17",
"@upstash/vector": "^1.2.2",
"adm-zip": "^0.5.16",
"ai": "^5.0.60",
"archiver": "^7.0.1",
"axios": "^1.12.2",
"better-auth": "^1.3.27",
"class-transformer": "^0.5.1",
"class-validator": "^0.14.2",
"dotenv": "^17.2.3",
"exceljs": "^4.4.0",
"jose": "^6.0.12",
"jspdf": "^3.0.3",
"mammoth": "^1.8.0",
Expand All @@ -47,6 +51,7 @@
"@nestjs/cli": "^11.0.0",
"@nestjs/schematics": "^11.0.0",
"@nestjs/testing": "^11.0.1",
"@types/adm-zip": "^0.5.7",
"@types/archiver": "^6.0.3",
"@types/express": "^5.0.0",
"@types/jest": "^30.0.0",
Expand Down
6 changes: 3 additions & 3 deletions apps/api/src/questionnaire/questionnaire.service.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import { db, Prisma } from '@db';
import { syncManualAnswerToVector, syncOrganizationEmbeddings } from '@/vector-store/lib';

// Import shared utilities
import { extractContentFromFile, type ContentExtractionLogger } from './utils/content-extractor';
import { extractContentFromFile, extractQuestionsWithAI, type ContentExtractionLogger } from './utils/content-extractor';
import { parseQuestionsAndAnswers, type QuestionAnswer as ParsedQA } from './utils/question-parser';
import { generateExportFile, type ExportFormat } from './utils/export-generator';
import {
Expand Down Expand Up @@ -149,12 +149,12 @@ export class QuestionnaireService {
source: dto.source || 'internal',
});

const content = await extractContentFromFile(
// Use AI-powered extraction (faster, handles all file formats)
const questionsAndAnswers = await extractQuestionsWithAI(
dto.fileData,
dto.fileType,
this.contentLogger,
);
const questionsAndAnswers = await parseQuestionsAndAnswers(content, this.contentLogger);

const questionnaireId = await persistQuestionnaireResult(
{
Expand Down
38 changes: 32 additions & 6 deletions apps/api/src/questionnaire/utils/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,38 @@ CRITICAL RULES:
8. Always write in first person plural (we, our, us) as if speaking on behalf of the organization.
9. Keep answers to 1-3 sentences maximum unless the question explicitly requires more detail.`;

export const QUESTION_PARSING_SYSTEM_PROMPT = `You parse vendor questionnaires. Return only genuine question text paired with its answer.
- Ignore table headers, column labels, metadata rows, or placeholder words such as "Question", "Company Name", "Department", "Assessment Date", "Name of Assessor".
- A valid question is a meaningful sentence (usually ends with '?' or starts with interrogatives like What/Why/How/When/Where/Is/Are/Do/Does/Can/Will/Should).
- Do not fabricate answers; if no answer is provided, set answer to null.
- Keep the original question wording but trim whitespace.`;
export const QUESTION_PARSING_SYSTEM_PROMPT = `You parse vendor questionnaires from Excel spreadsheets. Extract all question-answer pairs.

Input format:
- Each row has columns like: [Question] ID | [Question Text] actual question | [Response] answer | [Comment] notes
- Or: [Question] actual question text | [Response] answer
- Lines starting with [COLUMNS:] show the column headers - use these to understand the structure
- The actual question TEXT is usually the longest cell, contains "?" or starts with What/How/Do/Is/Are/etc.

CRITICAL: The "Question" column might contain just an ID (like "SQ14.3") - look for the column with the ACTUAL question text!

Rules:
1. Find the column containing actual question sentences (not just IDs/numbers)
2. The question text is usually a full sentence ending with "?" or starting with interrogative words
3. Extract the FULL question text, not the question ID
4. Match each question to its Response/Answer from the same row
5. If Response is empty, set answer to null
6. Skip section headers (e.g., "Information Security Program", "General Information")
7. Skip metadata rows (Company Name, Date, etc.)`;

// Vision extraction prompt for PDFs and images
export const VISION_EXTRACTION_PROMPT = `Extract all text and identify question-answer pairs. Look for columns/sections labeled "Question", "Q", "Answer", "A". Match questions (ending with "?" or starting with What/How/Why/When/Is/Can/Do) to nearby answers. Preserve order. Return only Question → Answer pairs.`;
export const VISION_EXTRACTION_PROMPT = `Extract all text and identify question-answer pairs from this document.

Look for:
- Tables with columns labeled "Question", "Q", "Response", "Answer", "A", "Comment"
- Questions ending with "?" or starting with What/How/Why/When/Where/Is/Are/Do/Does/Can/Will/Should
- Numbered questions like "06. Do you have...", "1) What is...", "Q1: How do..."
- Section headers (e.g., "Information Security Program", "General Information") that group questions

For each question found:
- Extract the full question text (may omit number prefix)
- Match it to any nearby response/answer in the same row or adjacent cell
- If no answer is provided, note it as empty

Preserve the order of questions as they appear. Return Question → Answer pairs in a structured format.`;

Loading
Loading