From ebcfa7e19e14034005503e532c36a68b89bb7535 Mon Sep 17 00:00:00 2001 From: Andrei Stoica Date: Sun, 25 Feb 2024 13:40:08 -0500 Subject: [PATCH] playing back response --- speech-speech/backend/api.py | 8 +++---- speech-speech/frontend/index.html | 3 +-- speech-speech/frontend/src/App.tsx | 38 +++++++++++++++++------------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/speech-speech/backend/api.py b/speech-speech/backend/api.py index e7173a3..9e285b7 100644 --- a/speech-speech/backend/api.py +++ b/speech-speech/backend/api.py @@ -28,7 +28,7 @@ class Conversation(BaseModel): @app.post("/get-text") -def get_text(response: Response, audio: bytes = File()): +def stt(audio: bytes = File()): with open("audio", "wb") as f: f.write(audio) # transcript = openAI_clinet.audio.transcriptions.create( @@ -43,7 +43,7 @@ def get_text(response: Response, audio: bytes = File()): @app.post("/conversation") -async def get_next_response(request: Request, response: Response): +async def get_next_response(request: Request): # role = "test" # res_msg = "temp test response" messages = await request.json() @@ -58,8 +58,8 @@ async def get_next_response(request: Request, response: Response): return {"role": role, "content": res_msg} -@app.post("/speak", response_class=FileResponse) -def tts(text: str, response: Response): +@app.get("/speak", response_class=FileResponse) +def tts(text: str): res = openAI_clinet.audio.speech.create( model="tts-1", voice="nova", diff --git a/speech-speech/frontend/index.html b/speech-speech/frontend/index.html index e4b78ea..8c93435 100644 --- a/speech-speech/frontend/index.html +++ b/speech-speech/frontend/index.html @@ -2,9 +2,8 @@ - - Vite + React + TS + Speach to Speech AI example
diff --git a/speech-speech/frontend/src/App.tsx b/speech-speech/frontend/src/App.tsx index 34bb22c..e8235f1 100644 --- a/speech-speech/frontend/src/App.tsx +++ b/speech-speech/frontend/src/App.tsx @@ -12,23 +12,10 @@ type ChatMsg = { content: string; }; -function Header() { - return ( -
-
- Speach to Speech AI example -
-
- ); -} - let audioBlobs = []; let streamBeingCaptured: MediaStream | null = null; let mediaRecorder: MediaRecorder | null = null; -let chat: Array = [{ - role: "system", - content: "You are a helpful assistant.", -}]; + function get_mic() { if (navigator.mediaDevices && navigator.mediaDevices.getUserMedia) { @@ -65,10 +52,26 @@ function stopRecord() { function playRecord() { const audioBlob = new Blob(audioBlobs, { type: "audio/webm" }); const audioUrl = URL.createObjectURL(audioBlob); - const audio = new Audio(audioUrl); + const audio = new Audio(audioUrl); audio.play(); } +function playMsg(msg: ChatMsg) { + const audio = new Audio("http://100.82.51.22:8001/speak?" + new URLSearchParams({text: msg.content})); + console.log("loading audio and playing?") + audio.play(); +} + +function Header() { + return ( +
+
+ Speach to Speech AI example +
+
+ ); +} + function Feed(props: { chat: Array[ChatMsg]; setChatStateFn: any }) { const bottomRef = useRef(null); @@ -86,7 +89,8 @@ function Feed(props: { chat: Array[ChatMsg]; setChatStateFn: any }) {
{props.chat.filter((m: ChatMsg) => m.role != "system").map(( m: ChatMsg, - ) => )} + i: number, + ) => )}
@@ -142,6 +146,8 @@ function Controls(props: { setChatStateFn: any; chat: Array[ChatMsg] }) { }).then((res) => res.json()) .then((res) => { props.setChatStateFn((curState) => [...curState, res]); + console.log("attempting to play result") + playMsg(res) }); }); }