Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
35 commits
Select commit Hold shift + click to select a range
e83d5c8
REMOVE success attribute from response in login-related responses
SergioNR Dec 27, 2025
e7d469a
ADD missing 403 response to login swagger docs
SergioNR Dec 27, 2025
6311a0e
REFACTOR login error response to use the global error handler
SergioNR Dec 27, 2025
75062d6
Merge pull request #29 from UXcaptain/UPDATE-login-related-swagger-docs
SergioNR Dec 27, 2025
7196efb
WIP on docker container for whisper-asr-webservice
SergioNR Dec 27, 2025
cc77b22
ADD depends_on and MODEL_IDLE_TIMEOUT to whisper-transcribe
SergioNR Dec 29, 2025
5e53de8
REMOVE npm AWS Transcribe
SergioNR Dec 29, 2025
7b1815e
REMOVE transcribe-related functionality
SergioNR Dec 29, 2025
1e5fe79
ADD logic & CRON for pending transcription Jobs from DB
SergioNR Dec 29, 2025
423fa3a
RENAME aws directory to s3-client
SergioNR Dec 29, 2025
2ecca57
ADD npm package form-data
SergioNR Dec 29, 2025
3978d28
WIP transcription logic
SergioNR Dec 29, 2025
d046217
REMOVE deprecated logic relating to AWS Transcribe
SergioNR Dec 29, 2025
98229f3
REFACTOR transcription job request flow
SergioNR Dec 29, 2025
65d2ac3
UPDATE transcribe attributes in compose.yaml
SergioNR Dec 30, 2025
6ed84d8
WIP in transcribe
SergioNR Dec 31, 2025
3317dc0
UPDATE logging & update transcription request --> transcription job
SergioNR Jan 1, 2026
b2cc138
WIP in transcription to send file to transcription service
SergioNR Jan 1, 2026
9566f7b
WIP in transcriptionJob processing
SergioNR Jan 2, 2026
8e9838d
REFACTOR logic for to download recording from S3 and sending to trans…
SergioNR Jan 2, 2026
47b4f1b
FIX transcription logic to return proper segments
SergioNR Jan 2, 2026
0d26371
REMOVE unused code
SergioNR Jan 2, 2026
5b9cba0
REMOVED transcription_enabled env - not needed anymore
SergioNR Jan 2, 2026
0e64562
ADD transcription_endpoint env variable
SergioNR Jan 2, 2026
a5b342d
UPDATE logic to update transcriptionJob statutes
SergioNR Jan 2, 2026
480ff0c
UPDATE cron job scheduler to 1 min for prod replica
SergioNR Jan 2, 2026
1746109
Merge pull request #30 from UXcaptain/add-transcription-functionality
SergioNR Jan 2, 2026
1b6b225
UPDATE npm packages
SergioNR Jan 2, 2026
e4a6a56
HOTFIX: run cron job every 15 minutes
SergioNR Jan 2, 2026
da3d0e1
UPDATE S3_ENDPOINT into S3_EXTERNAL_ENDPOINT
SergioNR Jan 5, 2026
f3b6117
ADD S3_INTERNAL_ENDPOINT for internal docker communication
SergioNR Jan 5, 2026
433d4d0
ADD getInternalS3 object logic
SergioNR Jan 5, 2026
13c96fc
FIX failed transcription job status update
SergioNR Jan 5, 2026
a15b819
ADD transcription job finalised status update
SergioNR Jan 5, 2026
65f9a6b
FIX error logging call missing error object on transcription job inse…
SergioNR Jan 5, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,8 @@ AWS_SECRET_ACCESS_KEY=
AWS_ACCESS_KEY_ID=
S3_REGION=
S3_BUCKET=
S3_ENDPOINT=

TRANSCRIPTION_ENABLED=
S3_EXTERNAL_ENDPOINT=
S3_INTERNAL_ENDPOINT=

MINIO_ROOT_USER=
MINIO_ROOT_PASSWORD=
60 changes: 51 additions & 9 deletions compose.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,18 @@
name: uxcaptain

networks:
uxcaptain-network:
name: uxcaptain-network
driver: bridge

volumes:
uxcaptain-database:
name: uxcaptain-database

whisper-transcription-models:
name: whisper-transcription-models


services:
# server:
# container_name: server
Expand All @@ -18,13 +32,13 @@ services:
# STRIPE_API_KEY: ${STRIPE_API_KEY}
# STRIPE_WEBHOOK_SECRET: ${STRIPE_WEBHOOK_SECRET}
# PORT: ${PORT}
# AWS_ACCESS_KEY_ID: ${AWS_ACCESS_KEY_ID}
# AWS_SECRET_ACCESS_KEY: ${AWS_SECRET_ACCESS_KEY}
# S3_BUCKET: dev-analysis-entry-storage
# S3_REGION: ${S3_REGION}
# S3_ENDPOINT: ${S3_ENDPOINT}
# S3_EXTERNAL_ENDPOINT: ${S3_EXTERNAL_ENDPOINT}
# S3_INTERNAL_ENDPOINT: ${S3_EXTERNAL_ENDPOINT} # Has to be internal to allow in-server communication
# MINIO_ROOT_USER=${MINIO_ROOT_USER}
# MINIO_ROOT_PASSWORD=${MINIO_ROOT_PASSWORD}
# TRANSCRIPTION_ENDPOINT= ${TRANSCRIPTION_ENDPOINT}

# ports:
# - 3000:3000
Expand All @@ -40,6 +54,15 @@ services:
# condition: service_started
# networks:
# - uxcaptain-network

# deploy:
# resources:
# limits:
# cpus: '1.0'
# memory: 512M
# reservations:
# cpus: '0.5'
# memory: 256M

minio:
image: minio/minio:latest
Expand Down Expand Up @@ -74,11 +97,30 @@ services:
networks:
- uxcaptain-network

networks:
uxcaptain-network:
name: uxcaptain-network
driver: bridge
faster-whisper-transcribe:
image: onerahmet/openai-whisper-asr-webservice:latest
container_name: faster-whisper-asr
ports:
- 9007:9000
volumes:
- whisper-transcription-models:/root/.cache # Model persistence (~2-5GB)
environment:
- ASR_MODEL=medium # tiny,base,small,medium,large https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-the-model
- ASR_DEVICE=cpu # https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-device-and-quantization
- ASR_ENGINE=faster_whisper # openai_whisper, faster_whisper, whisperx -- https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#whisperx
- MODEL_IDLE_TIMEOUT=0 # in Seconds - Keep model loaded - https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-the-model-unloading-timeout
- ASR_QUANTIZATION=int8 # https://ahmetoner.com/whisper-asr-webservice/environmental-variables/#configuring-device-and-quantization
restart: unless-stopped
depends_on:
- minio
networks:
- uxcaptain-network # Same as MinIO/monolith
deploy:
resources:
limits:
cpus: '4.0'
memory: 5000M



volumes:
uxcaptain-database:

2,155 changes: 878 additions & 1,277 deletions package-lock.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
},
"dependencies": {
"@aws-sdk/client-s3": "^3.936.0",
"@aws-sdk/client-transcribe": "^3.948.0",
"@aws-sdk/s3-request-presigner": "^3.936.0",
"@getbrevo/brevo": "^3.0.1",
"@prisma/client": "^6.19.0",
Expand All @@ -35,6 +34,7 @@
"express-session": "^1.18.2",
"express-slow-down": "^3.0.1",
"express-validator": "^7.3.1",
"form-data": "^4.0.5",
"helmet": "^8.1.0",
"npm": "^11.6.3",
"passport": "^0.7.0",
Expand Down
2 changes: 1 addition & 1 deletion server/controllers/analysisController.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import {
from '../models/analysisModel.js';

import { createAnalysisEntryInDb } from '../models/analysisEntryModel.js';
import { generateS3PutPresignedUrl } from '../integrations/aws/s3.js';
import { generateS3PutPresignedUrl } from '../integrations/s3-client/s3.js';

export const createAnalysis = async (req, res) => {
if (req.sanitizedErrors) {
Expand Down
16 changes: 10 additions & 6 deletions server/controllers/analysisEntryController.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { generateS3GetPresignedUrl } from '../integrations/aws/s3.js';
import { logError, logInfo } from '../config/loggerFunctions.js';
import { generateS3GetPresignedUrl } from '../integrations/s3-client/s3.js';
import { createAnalysisEntryInDb, getAnalysisEntryDetailsById, markAnalysisEntryAsSubmitted } from '../models/analysisEntryModel.js';
import { processTranscriptionRequest } from '../services/analysisService.js';
import { insertTranscriptionJobInDb } from '../models/transcriptionModel.js';

export const createAnalysisEntry = async (req, res) => {
const { analysisId } = req.body;
Expand All @@ -19,14 +20,17 @@ export const updateAnalysisEntry = async (req, res) => {

const updatedAnalysisEntry = await markAnalysisEntryAsSubmitted(analysisEntryId);

const transcriptionRequest = {
const transcriptionJob = {
analysisEntryId: analysisEntryId,
analysisId: updatedAnalysisEntry.analysis_id,
languageCode: 'es-ES',
languageCode: 'es',
};

if (process.env.TRANSCRIPTION_ENABLED === 'true') {
processTranscriptionRequest(transcriptionRequest); // Fire-and-forget
try {
await insertTranscriptionJobInDb(transcriptionJob);
logInfo(`Transcription job for ${transcriptionJob.analysisEntryId} stored in DB`, transcriptionJob);
} catch (error) {
logError(`error inserting ${transcriptionJob.analysisEntryId} analysisEntry's transcription request`, error);
}

return res.status(200).json({
Expand Down
9 changes: 1 addition & 8 deletions server/controllers/authController.js
Original file line number Diff line number Diff line change
Expand Up @@ -172,30 +172,24 @@ export const checkSession = async (req, res) => {
export const loginLocal = async (req, res, next) => {
if (req.sanitizedErrors) {
return res.status(422).json({
success: false,
message: 'Analysis could not be created due to validation errors',
errors: req.sanitizedErrors,
});
}

return passport.authenticate('local', (err, user /* , info */) => {
if (err) {
return res.status(500).json({
success: false,
message: 'An error occurred during login',
});
return next(err);
}

if (!user) { //* Will trigger if user does not exist
return res.status(401).json({
success: false,
message: 'The combination of email and password is incorrect',
});
}

if (user.role === 'participant') {
return res.status(403).json({
success: false,
message: 'Participant login is disabled',
});
}
Expand All @@ -215,7 +209,6 @@ export const loginLocal = async (req, res, next) => {
updateUserLastLoginDate(user.id); // Fire-and-forget function

return res.status(200).json({
success: true,
message: 'Login successful',
user: {
id: user.id,
Expand Down
45 changes: 45 additions & 0 deletions server/controllers/transcriptionController.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
import { logError, logInfo } from '../config/loggerFunctions.js';
import { transcribeRecording } from '../integrations/whisper-asr-webservice/transcribe.js';
import {
getPendingTranscriptionJobsFromDb,
storeNormalizedTranscriptionInDb,
updateStatusSingleTranscriptionJobInDb,
} from '../models/transcriptionModel.js';
import { cleanUpTranscriptSegments } from '../utils/transcription/transcriptionNormalizer.js';

export const processPendingTranscriptionJobs = async () => {
const pendingTranscriptionJobs = await getPendingTranscriptionJobsFromDb();

if (pendingTranscriptionJobs.length === 0) {
logInfo('No pending transcription jobs found');
return;
}

for (const transcriptionJob of pendingTranscriptionJobs) {
logInfo(`Processing transcription job for analysis entry ID: ${transcriptionJob.analysis_entry_id}`);

const { analysis_entry_id: analysisEntryId } = transcriptionJob;

try {
// Mark job as IN_PROGRESS before making async call to prevent re-queuing
await updateStatusSingleTranscriptionJobInDb(analysisEntryId, 'IN_PROGRESS');
logInfo(`Marked transcription job ${analysisEntryId} as IN_PROGRESS`);

const transcriptionJobResult = await transcribeRecording(transcriptionJob);

const { segments, text: fullText } = transcriptionJobResult;

const cleanedUpSegments = await cleanUpTranscriptSegments(segments);

await storeNormalizedTranscriptionInDb(analysisEntryId, fullText, cleanedUpSegments);

await updateStatusSingleTranscriptionJobInDb(analysisEntryId, 'COMPLETED');

logInfo(`Transcription job ${analysisEntryId} completed successfully`);
} catch (error) {
// Mark job back as PENDING to allow retry
await updateStatusSingleTranscriptionJobInDb(analysisEntryId, 'PENDING');
logError(`Error processing transcription job, ${analysisEntryId}`, error);
}
}
};
12 changes: 0 additions & 12 deletions server/cron/getCompletedTranscriptionJobsScheduler.js

This file was deleted.

11 changes: 11 additions & 0 deletions server/cron/getPendingTranscriptionJobScheduler.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import { CronJob } from 'cron';
import { processPendingTranscriptionJobs } from '../controllers/transcriptionController.js';
import { logError } from '../config/loggerFunctions.js';

export const getPendingTranscriptionJobScheduler = new CronJob('*/15 * * * *', async () => {
try {
await processPendingTranscriptionJobs();
} catch (error) {
logError('Error processing transcription request', error);
}
});
12 changes: 4 additions & 8 deletions server/cron/jobsContainer.js
Original file line number Diff line number Diff line change
@@ -1,20 +1,16 @@
import { logError } from '../config/loggerFunctions.js';
import { logError, logInfo } from '../config/loggerFunctions.js';
import { deletePasswordResetTokensScheduler } from './deletePasswordResetTokensScheduler.js';
import { getCompletedTranscriptionJobsScheduler } from './getCompletedTranscriptionJobsScheduler.js';
import { getPendingTranscriptionJobScheduler } from './getPendingTranscriptionJobScheduler.js';
import { markAnalysisEntriesAsCancelledScheduler } from './markAsCancelledAnalysisEntriesScheduler.js';

export const startCronJobs = () => {
logInfo('Starting cron jobs');
try {
deletePasswordResetTokensScheduler.start();

if (process.env.TRANSCRIPTION_ENABLED === true) {
getCompletedTranscriptionJobsScheduler.start();
}
getPendingTranscriptionJobScheduler.start();

markAnalysisEntriesAsCancelledScheduler.start();


console.log('Cron jobs started');
} catch (error) {
logError('error on startCronJobs', error);
}
Expand Down
56 changes: 0 additions & 56 deletions server/integrations/aws/Transcribe.js

This file was deleted.

Loading