add deepspeech support
This commit is contained in:
parent
59ca151e67
commit
0a7400e14a
2 changed files with 31 additions and 2 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -127,3 +127,6 @@ dmypy.json
|
|||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
deepspeech-0.9.3-models.pbmm
|
||||
deepspeech-0.9.3-models.scorer
|
|
@ -5,6 +5,8 @@ import os
|
|||
from os import path
|
||||
import asyncio
|
||||
from pydub import AudioSegment
|
||||
import deepspeech
|
||||
import numpy as np
|
||||
import speech_recognition as sr
|
||||
|
||||
st = sr.Recognizer()
|
||||
|
@ -24,7 +26,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
|
|||
try:
|
||||
await inter.response.defer(ephemeral='true')
|
||||
await message.attachments[0].save("audio.ogg")
|
||||
embed=disnake.Embed(title=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
|
||||
embed=disnake.Embed(title="Audio Transcription",description=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
|
||||
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
||||
embed.set_footer(text="Accuracy not guaranteed")
|
||||
await inter.edit_original_message(embed=embed)
|
||||
|
@ -39,7 +41,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
|
|||
await inter.response.defer(ephemeral='true')
|
||||
await message.attachments[0].save("audio.ogg")
|
||||
# WARNING Google is propritary, consider disabling however sphynix is currently not very good so this provides an option
|
||||
embed=disnake.Embed(title=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
|
||||
embed=disnake.Embed(title="Audio Transcription",description=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
|
||||
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
||||
embed.set_footer(text="Accuracy not guaranteed")
|
||||
await inter.edit_original_message(embed=embed)
|
||||
|
@ -49,5 +51,29 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
|
|||
except Exception as e:
|
||||
await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')
|
||||
|
||||
@bot.message_command(name="Transcribe Using DeepSpeech")
|
||||
async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message: disnake.Message):
|
||||
try:
|
||||
await inter.response.defer(ephemeral='true')
|
||||
await message.attachments[0].save("audio.ogg")
|
||||
audio = AudioSegment.from_file("audio.ogg", format="ogg")
|
||||
audio.export("audio.wav", format="wav")
|
||||
|
||||
model = deepspeech.Model('deepspeech-0.9.3-models.pbmm')
|
||||
model.enableExternalScorer('deepspeech-0.9.3-models.scorer')
|
||||
model.setBeamWidth(500)
|
||||
model.setScorerAlphaBeta(0.75, 1.85)
|
||||
|
||||
with open("audio.wav", 'rb') as f:
|
||||
audio = np.frombuffer(f.read(), np.int16)
|
||||
embed=disnake.Embed(title="Audio Transcription",description=model.stt(audio), color=0x3584e4)
|
||||
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
||||
embed.set_footer(text="Accuracy not guaranteed")
|
||||
await inter.edit_original_message(embed=embed)
|
||||
await asyncio.sleep(3)
|
||||
os.remove("audio.ogg")
|
||||
os.remove("audio.wav")
|
||||
except Exception as e:
|
||||
await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')
|
||||
|
||||
bot.run(os.getenv("TOKEN"))
|
Loading…
Reference in a new issue