add deepspeech support

2023-06-19 13:10:17 -04:00 · 2023-06-19 13:10:17 -04:00 · 0a7400e14a
commit 0a7400e14a
parent 59ca151e67
2 changed files with 31 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -127,3 +127,6 @@ dmypy.json

 # Pyre type checker
 .pyre/
+
+deepspeech-0.9.3-models.pbmm
+deepspeech-0.9.3-models.scorer
--- a/transcribebot.py
+++ b/transcribebot.py
@ -5,6 +5,8 @@ import os
 from os import path
 import asyncio
 from pydub import AudioSegment
+import deepspeech
+import numpy as np
 import speech_recognition as sr

 st = sr.Recognizer()
@ -24,7 +26,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
    try:
        await inter.response.defer(ephemeral='true')
        await message.attachments[0].save("audio.ogg")
-        embed=disnake.Embed(title=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
+        embed=disnake.Embed(title="Audio Transcription",description=st.recognize_sphinx(prepaudio("audio.ogg")), color=0x3584e4)
        embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
        embed.set_footer(text="Accuracy not guaranteed")
        await inter.edit_original_message(embed=embed)
@ -39,7 +41,7 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
        await inter.response.defer(ephemeral='true')
        await message.attachments[0].save("audio.ogg")
        # WARNING Google is propritary, consider disabling however sphynix is currently not very good so this provides an option
-        embed=disnake.Embed(title=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
+        embed=disnake.Embed(title="Audio Transcription",description=st.recognize_google(prepaudio("audio.ogg")), color=0x3584e4)
        embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
        embed.set_footer(text="Accuracy not guaranteed")
        await inter.edit_original_message(embed=embed)
@ -49,5 +51,29 @@ async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message
    except Exception as e:
        await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')

+@bot.message_command(name="Transcribe Using DeepSpeech")
+async def transcribesphinx(inter: disnake.ApplicationCommandInteraction, message: disnake.Message):
+    try:
+        await inter.response.defer(ephemeral='true')
+        await message.attachments[0].save("audio.ogg")
+        audio = AudioSegment.from_file("audio.ogg", format="ogg")
+        audio.export("audio.wav", format="wav")
+
+        model = deepspeech.Model('deepspeech-0.9.3-models.pbmm')
+        model.enableExternalScorer('deepspeech-0.9.3-models.scorer')
+        model.setBeamWidth(500)
+        model.setScorerAlphaBeta(0.75, 1.85)
+        
+        with open("audio.wav", 'rb') as f:
+            audio = np.frombuffer(f.read(), np.int16)
+        embed=disnake.Embed(title="Audio Transcription",description=model.stt(audio), color=0x3584e4)
+        embed.set_author(name=message.author.display_name, url=message.jump_url, icon_url=message.author.display_avatar)
+        embed.set_footer(text="Accuracy not guaranteed")
+        await inter.edit_original_message(embed=embed)
+        await asyncio.sleep(3)
+        os.remove("audio.ogg")
+        os.remove("audio.wav")
+    except Exception as e:
+        await inter.edit_original_message(content=f'an error appears to have occoured please report it to the developer: {e}')

 bot.run(os.getenv("TOKEN"))