add deepspeech support

This commit is contained in:
insert-usernamehere 2023-06-19 13:10:17 -04:00
parent 59ca151e67
commit 0a7400e14a
2 changed files with 31 additions and 2 deletions

3
.gitignore vendored
View file

@ -127,3 +127,6 @@ dmypy.json
# Pyre type checker
.pyre/
deepspeech-0.9.3-models.pbmm
deepspeech-0.9.3-models.scorer

View file

@ -5,6 +5,8 @@ import os
from os import path
import asyncio
from pydub import AudioSegment
import deepspeech
import numpy as np
import speech_recognition as sr
st = sr.Recognizer()
@ -24,7 +26,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
try:
await inter.response.defer(ephemeral='true')
await message.attachments[0].save("audio.ogg")
embed=disnake.Embed(title=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
embed=disnake.Embed(title="Audio Transcription",description=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
embed.set_footer(text="Accuracy not guaranteed")
await inter.edit_original_message(embed=embed)
@ -39,7 +41,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
await inter.response.defer(ephemeral='true')
await message.attachments[0].save("audio.ogg")
# WARNING Google is propritary, consider disabling however sphynix is currently not very good so this provides an option
embed=disnake.Embed(title=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
embed=disnake.Embed(title="Audio Transcription",description=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
embed.set_footer(text="Accuracy not guaranteed")
await inter.edit_original_message(embed=embed)
@ -49,5 +51,29 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
except Exception as e:
await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')
@bot.message_command(name="Transcribe Using DeepSpeech")
async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message: disnake.Message):
try:
await inter.response.defer(ephemeral='true')
await message.attachments[0].save("audio.ogg")
audio = AudioSegment.from_file("audio.ogg", format="ogg")
audio.export("audio.wav", format="wav")
model = deepspeech.Model('deepspeech-0.9.3-models.pbmm')
model.enableExternalScorer('deepspeech-0.9.3-models.scorer')
model.setBeamWidth(500)
model.setScorerAlphaBeta(0.75, 1.85)
with open("audio.wav", 'rb') as f:
audio = np.frombuffer(f.read(), np.int16)
embed=disnake.Embed(title="Audio Transcription",description=model.stt(audio), color=0x3584e4)
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
embed.set_footer(text="Accuracy not guaranteed")
await inter.edit_original_message(embed=embed)
await asyncio.sleep(3)
os.remove("audio.ogg")
os.remove("audio.wav")
except Exception as e:
await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')
bot.run(os.getenv("TOKEN"))