add deepspeech support
This commit is contained in:
parent
59ca151e67
commit
0a7400e14a
2 changed files with 31 additions and 2 deletions
3
.gitignore
vendored
3
.gitignore
vendored
|
@ -127,3 +127,6 @@ dmypy.json
|
||||||
|
|
||||||
# Pyre type checker
|
# Pyre type checker
|
||||||
.pyre/
|
.pyre/
|
||||||
|
|
||||||
|
deepspeech-0.9.3-models.pbmm
|
||||||
|
deepspeech-0.9.3-models.scorer
|
|
@ -5,6 +5,8 @@ import os
|
||||||
from os import path
|
from os import path
|
||||||
import asyncio
|
import asyncio
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
|
import deepspeech
|
||||||
|
import numpy as np
|
||||||
import speech_recognition as sr
|
import speech_recognition as sr
|
||||||
|
|
||||||
st = sr.Recognizer()
|
st = sr.Recognizer()
|
||||||
|
@ -24,7 +26,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
|
||||||
try:
|
try:
|
||||||
await inter.response.defer(ephemeral='true')
|
await inter.response.defer(ephemeral='true')
|
||||||
await message.attachments[0].save("audio.ogg")
|
await message.attachments[0].save("audio.ogg")
|
||||||
embed=disnake.Embed(title=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
|
embed=disnake.Embed(title="Audio Transcription",description=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
|
||||||
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
||||||
embed.set_footer(text="Accuracy not guaranteed")
|
embed.set_footer(text="Accuracy not guaranteed")
|
||||||
await inter.edit_original_message(embed=embed)
|
await inter.edit_original_message(embed=embed)
|
||||||
|
@ -39,7 +41,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
|
||||||
await inter.response.defer(ephemeral='true')
|
await inter.response.defer(ephemeral='true')
|
||||||
await message.attachments[0].save("audio.ogg")
|
await message.attachments[0].save("audio.ogg")
|
||||||
# WARNING Google is propritary, consider disabling however sphynix is currently not very good so this provides an option
|
# WARNING Google is propritary, consider disabling however sphynix is currently not very good so this provides an option
|
||||||
embed=disnake.Embed(title=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
|
embed=disnake.Embed(title="Audio Transcription",description=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
|
||||||
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
||||||
embed.set_footer(text="Accuracy not guaranteed")
|
embed.set_footer(text="Accuracy not guaranteed")
|
||||||
await inter.edit_original_message(embed=embed)
|
await inter.edit_original_message(embed=embed)
|
||||||
|
@ -49,5 +51,29 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')
|
await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')
|
||||||
|
|
||||||
|
@bot.message_command(name="Transcribe Using DeepSpeech")
|
||||||
|
async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message: disnake.Message):
|
||||||
|
try:
|
||||||
|
await inter.response.defer(ephemeral='true')
|
||||||
|
await message.attachments[0].save("audio.ogg")
|
||||||
|
audio = AudioSegment.from_file("audio.ogg", format="ogg")
|
||||||
|
audio.export("audio.wav", format="wav")
|
||||||
|
|
||||||
|
model = deepspeech.Model('deepspeech-0.9.3-models.pbmm')
|
||||||
|
model.enableExternalScorer('deepspeech-0.9.3-models.scorer')
|
||||||
|
model.setBeamWidth(500)
|
||||||
|
model.setScorerAlphaBeta(0.75, 1.85)
|
||||||
|
|
||||||
|
with open("audio.wav", 'rb') as f:
|
||||||
|
audio = np.frombuffer(f.read(), np.int16)
|
||||||
|
embed=disnake.Embed(title="Audio Transcription",description=model.stt(audio), color=0x3584e4)
|
||||||
|
embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
|
||||||
|
embed.set_footer(text="Accuracy not guaranteed")
|
||||||
|
await inter.edit_original_message(embed=embed)
|
||||||
|
await asyncio.sleep(3)
|
||||||
|
os.remove("audio.ogg")
|
||||||
|
os.remove("audio.wav")
|
||||||
|
except Exception as e:
|
||||||
|
await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')
|
||||||
|
|
||||||
bot.run(os.getenv("TOKEN"))
|
bot.run(os.getenv("TOKEN"))
|
Loading…
Reference in a new issue