trycompai · Marfuen · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025 · Dec 5, 2025
diff --git a/apps/api/.env.example b/apps/api/.env.example
@@ -23,4 +23,6 @@ UPSTASH_VECTOR_REST_TOKEN=
 # Trigger
 TRIGGER_SECRET_KEY=
 
-OPENAI_API_KEY=
+OPENAI_API_KEY=
+ANTHROPIC_API_KEY=
+GROQ_API_KEY=
diff --git a/apps/api/package.json b/apps/api/package.json
@@ -4,28 +4,32 @@
   "version": "0.0.1",
   "author": "",
   "dependencies": {
+    "@ai-sdk/anthropic": "^2.0.53",
+    "@ai-sdk/groq": "^2.0.32",
     "@ai-sdk/openai": "^2.0.65",
-    "@prisma/instrumentation": "^6.13.0",
-    "@trigger.dev/build": "4.0.6",
-    "@trigger.dev/sdk": "4.0.6",
-    "@upstash/vector": "^1.2.2",
     "@aws-sdk/client-s3": "^3.859.0",
-    "ai": "^5.0.60",
     "@aws-sdk/s3-request-presigner": "^3.859.0",
     "@nestjs/common": "^11.0.1",
     "@nestjs/config": "^4.0.2",
     "@nestjs/core": "^11.0.1",
     "@nestjs/platform-express": "^11.1.5",
     "@nestjs/swagger": "^11.2.0",
     "@prisma/client": "^6.13.0",
+    "@prisma/instrumentation": "^6.13.0",
     "@react-email/components": "^0.0.41",
+    "@trigger.dev/build": "4.0.6",
+    "@trigger.dev/sdk": "4.0.6",
     "@trycompai/db": "^1.3.17",
+    "@upstash/vector": "^1.2.2",
+    "adm-zip": "^0.5.16",
+    "ai": "^5.0.60",
     "archiver": "^7.0.1",
     "axios": "^1.12.2",
     "better-auth": "^1.3.27",
     "class-transformer": "^0.5.1",
     "class-validator": "^0.14.2",
     "dotenv": "^17.2.3",
+    "exceljs": "^4.4.0",
     "jose": "^6.0.12",
     "jspdf": "^3.0.3",
     "mammoth": "^1.8.0",
@@ -47,6 +51,7 @@
     "@nestjs/cli": "^11.0.0",
     "@nestjs/schematics": "^11.0.0",
     "@nestjs/testing": "^11.0.1",
+    "@types/adm-zip": "^0.5.7",
     "@types/archiver": "^6.0.3",
     "@types/express": "^5.0.0",
     "@types/jest": "^30.0.0",

diff --git a/apps/api/src/questionnaire/questionnaire.service.ts b/apps/api/src/questionnaire/questionnaire.service.ts
@@ -16,7 +16,7 @@ import { db, Prisma } from '@db';
 import { syncManualAnswerToVector, syncOrganizationEmbeddings } from '@/vector-store/lib';
 
 // Import shared utilities
-import { extractContentFromFile, type ContentExtractionLogger } from './utils/content-extractor';
+import { extractContentFromFile, extractQuestionsWithAI, type ContentExtractionLogger } from './utils/content-extractor';
 import { parseQuestionsAndAnswers, type QuestionAnswer as ParsedQA } from './utils/question-parser';
 import { generateExportFile, type ExportFormat } from './utils/export-generator';
 import {
@@ -149,12 +149,12 @@ export class QuestionnaireService {
       source: dto.source || 'internal',
     });
 
-    const content = await extractContentFromFile(
+    // Use AI-powered extraction (faster, handles all file formats)
+    const questionsAndAnswers = await extractQuestionsWithAI(
       dto.fileData,
       dto.fileType,
       this.contentLogger,
     );
-    const questionsAndAnswers = await parseQuestionsAndAnswers(content, this.contentLogger);
 
     const questionnaireId = await persistQuestionnaireResult(
       {

diff --git a/apps/api/src/questionnaire/utils/constants.ts b/apps/api/src/questionnaire/utils/constants.ts
@@ -30,12 +30,38 @@ CRITICAL RULES:
 8. Always write in first person plural (we, our, us) as if speaking on behalf of the organization.
 9. Keep answers to 1-3 sentences maximum unless the question explicitly requires more detail.`;
 
-export const QUESTION_PARSING_SYSTEM_PROMPT = `You parse vendor questionnaires. Return only genuine question text paired with its answer.
-- Ignore table headers, column labels, metadata rows, or placeholder words such as "Question", "Company Name", "Department", "Assessment Date", "Name of Assessor".
-- A valid question is a meaningful sentence (usually ends with '?' or starts with interrogatives like What/Why/How/When/Where/Is/Are/Do/Does/Can/Will/Should).
-- Do not fabricate answers; if no answer is provided, set answer to null.
-- Keep the original question wording but trim whitespace.`;
+export const QUESTION_PARSING_SYSTEM_PROMPT = `You parse vendor questionnaires from Excel spreadsheets. Extract all question-answer pairs.
+
+Input format:
+- Each row has columns like: [Question] ID | [Question Text] actual question | [Response] answer | [Comment] notes
+- Or: [Question] actual question text | [Response] answer
+- Lines starting with [COLUMNS:] show the column headers - use these to understand the structure
+- The actual question TEXT is usually the longest cell, contains "?" or starts with What/How/Do/Is/Are/etc.
+
+CRITICAL: The "Question" column might contain just an ID (like "SQ14.3") - look for the column with the ACTUAL question text!
+
+Rules:
+1. Find the column containing actual question sentences (not just IDs/numbers)
+2. The question text is usually a full sentence ending with "?" or starting with interrogative words
+3. Extract the FULL question text, not the question ID
+4. Match each question to its Response/Answer from the same row
+5. If Response is empty, set answer to null
+6. Skip section headers (e.g., "Information Security Program", "General Information")
+7. Skip metadata rows (Company Name, Date, etc.)`;
 
 // Vision extraction prompt for PDFs and images
-export const VISION_EXTRACTION_PROMPT = `Extract all text and identify question-answer pairs. Look for columns/sections labeled "Question", "Q", "Answer", "A". Match questions (ending with "?" or starting with What/How/Why/When/Is/Can/Do) to nearby answers. Preserve order. Return only Question → Answer pairs.`;
+export const VISION_EXTRACTION_PROMPT = `Extract all text and identify question-answer pairs from this document.
+
+Look for:
+- Tables with columns labeled "Question", "Q", "Response", "Answer", "A", "Comment"
+- Questions ending with "?" or starting with What/How/Why/When/Where/Is/Are/Do/Does/Can/Will/Should
+- Numbered questions like "06. Do you have...", "1) What is...", "Q1: How do..."
+- Section headers (e.g., "Information Security Program", "General Information") that group questions
+
+For each question found:
+- Extract the full question text (may omit number prefix)
+- Match it to any nearby response/answer in the same row or adjacent cell
+- If no answer is provided, note it as empty
+
+Preserve the order of questions as they appear. Return Question → Answer pairs in a structured format.`;