From 0a7400e14a20b804824efcfed83a0cb89d96f39e Mon Sep 17 00:00:00 2001 From: insert-usernamehere Date: Mon, 19 Jun 2023 13:10:17 -0400 Subject: [PATCH] add deepspeech support --- .gitignore | 3 +++ transcribebot.py | 30 ++++++++++++++++++++++++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index b6e4761..b343a81 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,6 @@ dmypy.json # Pyre type checker .pyre/ + +deepspeech-0.9.3-models.pbmm +deepspeech-0.9.3-models.scorer \ No newline at end of file diff --git a/transcribebot.py b/transcribebot.py index d265f9e..00d940f 100644 --- a/transcribebot.py +++ b/transcribebot.py @@ -5,6 +5,8 @@ import os from os import path import asyncio from pydub import AudioSegment +import deepspeech +import numpy as np import speech_recognition as sr st = sr.Recognizer() @@ -24,7 +26,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message try: await inter.response.defer(ephemeral='true') await message.attachments[0].save("audio.ogg") - embed=disnake.Embed(title=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4) + embed=disnake.Embed(title="Audio Transcription",description=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4) embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar) embed.set_footer(text="Accuracy not guaranteed") await inter.edit_original_message(embed=embed) @@ -39,7 +41,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message await inter.response.defer(ephemeral='true') await message.attachments[0].save("audio.ogg") # WARNING Google is propritary, consider disabling however sphynix is currently not very good so this provides an option - embed=disnake.Embed(title=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4) + embed=disnake.Embed(title="Audio Transcription",description=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4) embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar) embed.set_footer(text="Accuracy not guaranteed") await inter.edit_original_message(embed=embed) @@ -49,5 +51,29 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message except Exception as e: await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}') +@bot.message_command(name="Transcribe Using DeepSpeech") +async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message: disnake.Message): + try: + await inter.response.defer(ephemeral='true') + await message.attachments[0].save("audio.ogg") + audio = AudioSegment.from_file("audio.ogg", format="ogg") + audio.export("audio.wav", format="wav") + + model = deepspeech.Model('deepspeech-0.9.3-models.pbmm') + model.enableExternalScorer('deepspeech-0.9.3-models.scorer') + model.setBeamWidth(500) + model.setScorerAlphaBeta(0.75, 1.85) + + with open("audio.wav", 'rb') as f: + audio = np.frombuffer(f.read(), np.int16) + embed=disnake.Embed(title="Audio Transcription",description=model.stt(audio), color=0x3584e4) + embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar) + embed.set_footer(text="Accuracy not guaranteed") + await inter.edit_original_message(embed=embed) + await asyncio.sleep(3) + os.remove("audio.ogg") + os.remove("audio.wav") + except Exception as e: + await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}') bot.run(os.getenv("TOKEN")) \ No newline at end of file