diff --git a/src/App.tsx b/src/App.tsx index 2a1be4e..8773a41 100644 --- a/src/App.tsx +++ b/src/App.tsx @@ -16,6 +16,8 @@ function App() { const scribearStatus = useSelector((state: RootState) => state.APIStatusReducer?.scribearServerStatus as number); const scribearMessage = useSelector((state: RootState) => (state.APIStatusReducer as any)?.scribearServerMessage as string | undefined); + const micNoAudio = useSelector((state: RootState) => (state.ControlReducer as any)?.micNoAudio as boolean | undefined); + const listening = useSelector((state: RootState) => (state.ControlReducer as any)?.listening as boolean | undefined); const [snackbarOpen, setSnackbarOpen] = useState(false); const [snackbarMsg, setSnackbarMsg] = useState(''); @@ -39,6 +41,41 @@ function App() { } }, [scribearStatus]); + useEffect(() => { + // show mic inactivity when mic is on but no audio chunks are received + if (listening && micNoAudio) { + setSnackbarMsg('Microphone is active but no audio detected'); + setSnackbarSeverity('warning'); + setSnackbarOpen(true); + } + + // When listening turns ON, start a one-shot timer that expects at least one ondataavailable + // call within thresholdMs. This avoids firing on normal silent pauses after audio has been + // received previously. We only trigger inactivity if no blob arrives at all after enabling mic. + const thresholdMs = 3000; + try { + if (listening) { + try { (window as any).__hasReceivedAudio = false; } catch (e) {} + if ((window as any).__initialAudioTimer) { try { clearTimeout((window as any).__initialAudioTimer); } catch (e) {} } + (window as any).__initialAudioTimer = setTimeout(() => { + try { + const has = (window as any).__hasReceivedAudio === true; + if (!has) { + try { (window as any).store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: true }); } catch (e) {} + } + } catch (e) {} + }, thresholdMs); + } else { + // listening turned off: clear initial timer and ensure flag reset + try { if ((window as any).__initialAudioTimer) { clearTimeout((window as any).__initialAudioTimer); (window as any).__initialAudioTimer = null; } } catch (e) {} + try { (window as any).store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); } catch (e) {} + } + } catch (e) { + console.warn('Failed to start initial mic monitor', e); + } + // no cleanup needed here because we clear/set timer when listening toggles + }, [listening, micNoAudio]); + const handleClose = (_event?: React.SyntheticEvent | Event, reason?: string) => { if (reason === 'clickaway') return; setSnackbarOpen(false); diff --git a/src/components/api/scribearServer/scribearRecognizer.tsx b/src/components/api/scribearServer/scribearRecognizer.tsx index 931293c..0677773 100644 --- a/src/components/api/scribearServer/scribearRecognizer.tsx +++ b/src/components/api/scribearServer/scribearRecognizer.tsx @@ -31,6 +31,8 @@ export class ScribearRecognizer implements Recognizer { private language: string private recorder?: RecordRTC; private kSampleRate = 16000; + private lastAudioTimestamp: number | null = null; + private inactivityInterval: any = null; urlParams = new URLSearchParams(window.location.search); mode = this.urlParams.get('mode'); @@ -58,6 +60,18 @@ export class ScribearRecognizer implements Recognizer { desiredSampRate: this.kSampleRate, timeSlice: 50, ondataavailable: async (blob: Blob) => { + // update last audio timestamp and mark that we've received at least one audio chunk + this.lastAudioTimestamp = Date.now(); + try { (window as any).__lastAudioTimestamp = this.lastAudioTimestamp; } catch (e) {} + try { (window as any).__hasReceivedAudio = true; if ((window as any).__initialAudioTimer) { clearTimeout((window as any).__initialAudioTimer); (window as any).__initialAudioTimer = null; } } catch (e) {} + try { + const controlState = (store.getState() as any).ControlReducer; + if (controlState?.micNoAudio === true) { + store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } + } catch (e) { + console.warn('Failed to clear mic inactivity', e); + } this.socket?.send(blob); }, recorderType: StereoAudioRecorder, @@ -65,6 +79,33 @@ export class ScribearRecognizer implements Recognizer { }); this.recorder.startRecording(); + + // start inactivity monitor + const thresholdMs = 3000; + if (this.inactivityInterval == null) { + this.inactivityInterval = setInterval(() => { + try { + const state: any = store.getState(); + const listening = state.ControlReducer?.listening === true; + const micNoAudio = state.ControlReducer?.micNoAudio === true; + if (listening) { + if (!this.lastAudioTimestamp || (Date.now() - this.lastAudioTimestamp > thresholdMs)) { + if (!micNoAudio) { + store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: true }); + } + } else { + if (micNoAudio) { + store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } + } + } else { + if (micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } + } catch (e) { + console.warn('Error in mic inactivity interval', e); + } + }, 1000); + } } /** @@ -179,6 +220,15 @@ export class ScribearRecognizer implements Recognizer { if (!this.socket) { return; } this.socket.close(); this.socket = null; + if (this.inactivityInterval) { + clearInterval(this.inactivityInterval); + this.inactivityInterval = null; + } + try { + store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } catch (e) { + console.warn('Failed to clear mic inactivity on stop', e); + } } /** diff --git a/src/components/api/whisper/whisperRecognizer.tsx b/src/components/api/whisper/whisperRecognizer.tsx index ad61e21..4bc866e 100644 --- a/src/components/api/whisper/whisperRecognizer.tsx +++ b/src/components/api/whisper/whisperRecognizer.tsx @@ -48,6 +48,8 @@ export class WhisperRecognizer implements Recognizer { private num_threads: number; private transcribed_callback: ((newFinalBlocks: Array, newInProgressBlock: TranscriptBlock) => void) | null = null; + private lastAudioTimestamp: number | null = null; + private inactivityInterval: any = null; /** * Creates an Whisper recognizer instance that listens to the default microphone @@ -135,6 +137,20 @@ export class WhisperRecognizer implements Recognizer { pcm_data = Float32Concat(last_suffix, pcm_data); last_suffix = pcm_data.slice(-(pcm_data.length % 128)) + // update last audio timestamp and mark that we've received at least one audio chunk + this.lastAudioTimestamp = Date.now(); + try { (window as any).__lastAudioTimestamp = this.lastAudioTimestamp; } catch (e) {} + try { (window as any).__hasReceivedAudio = true; if ((window as any).__initialAudioTimer) { clearTimeout((window as any).__initialAudioTimer); (window as any).__initialAudioTimer = null; } } catch (e) {} + try { + const { store } = require('../../../store'); + const controlState = (store.getState() as any).ControlReducer; + if (controlState?.micNoAudio === true) { + store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } + } catch (e) { + console.warn('Failed to clear mic inactivity (whisper)', e); + } + // Feed process_recorder_message audio in 128 sample chunks for (let i = 0; i < pcm_data.length - 127; i+= 128) { const audio_chunk = pcm_data.subarray(i, i + 128) @@ -149,6 +165,29 @@ export class WhisperRecognizer implements Recognizer { this.recorder.startRecording(); console.log("Whisper: Done setting up audio context"); + + const thresholdMs = 3000; + if (this.inactivityInterval == null) { + const { store } = require('../../../store'); + this.inactivityInterval = setInterval(() => { + try { + const state: any = store.getState(); + const listening = state.ControlReducer?.listening === true; + const micNoAudio = state.ControlReducer?.micNoAudio === true; + if (listening) { + if (!this.lastAudioTimestamp || (Date.now() - this.lastAudioTimestamp > thresholdMs)) { + if (!micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: true }); + } else { + if (micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } + } else { + if (micNoAudio) store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } + } catch (e) { + console.warn('Error in whisper mic inactivity interval', e); + } + }, 1000); + } } private async load_model(model: string) { @@ -257,6 +296,17 @@ export class WhisperRecognizer implements Recognizer { this.whisper.set_status("paused"); this.context.suspend(); this.recorder?.stopRecording(); + + if (this.inactivityInterval) { + clearInterval(this.inactivityInterval); + this.inactivityInterval = null; + } + try { + const { store } = require('../../../store'); + store.dispatch({ type: 'SET_MIC_INACTIVITY', payload: false }); + } catch (e) { + console.warn('Failed to clear mic inactivity on whisper stop', e); + } } /** diff --git a/src/react-redux&middleware/redux/reducers/controlReducers.tsx b/src/react-redux&middleware/redux/reducers/controlReducers.tsx index f21c280..847b52b 100644 --- a/src/react-redux&middleware/redux/reducers/controlReducers.tsx +++ b/src/react-redux&middleware/redux/reducers/controlReducers.tsx @@ -22,6 +22,7 @@ const initialControlState : ControlStatus = { showMFCC: false, showSpeaker: false, showIntent: false, + micNoAudio: false, } export function ControlReducer(state = initialControlState, action) { @@ -41,6 +42,8 @@ export function ControlReducer(state = initialControlState, action) { return { ...state, showIntent: !state.showIntent }; case 'FLIP_RECORDING_PHRASE': return { ...state, listening: action.payload}; + case 'SET_MIC_INACTIVITY': + return { ...state, micNoAudio: action.payload }; case 'SET_SPEECH_LANGUAGE': return { ...state, diff --git a/src/react-redux&middleware/redux/types/controlStatus.tsx b/src/react-redux&middleware/redux/types/controlStatus.tsx index 52197a4..2279af2 100644 --- a/src/react-redux&middleware/redux/types/controlStatus.tsx +++ b/src/react-redux&middleware/redux/types/controlStatus.tsx @@ -16,4 +16,5 @@ export type ControlStatus = { showMFCC: boolean showSpeaker: boolean showIntent: boolean + micNoAudio?: boolean } diff --git a/src/store.tsx b/src/store.tsx index 8b1f8a3..6e0e883 100644 --- a/src/store.tsx +++ b/src/store.tsx @@ -37,3 +37,5 @@ export const store = configureStore({ }); export type RootState = ReturnType +(window as any).store = store; +