你能在 PyQt5 窗口上显示连续的语音到文本吗?

问题描述

我正在使用 Azure 语音识别,我想创建我自己的语音转文本应用程序,并且我想在 PyQt5 窗口上显示文本。

我仍然无法弄清楚如何将识别的文本作为要显示输出...

我从另一个来源获得了这段代码,它使用 tinkter 在窗口上显示文本,我想做同样的事情

原始代码来源: https://github.com/jimbobbennett/TwitchCaptioner/blob/master/captioner.py

这是我正在尝试做的代码! `

import sys

from PyQt5.QtWidgets import QApplication
from PyQt5.QtWidgets import QVBoxLayout
from PyQt5.QtWidgets import QPushButton
from PyQt5.QtWidgets import QWidget
from PyQt5.QtWidgets import QLabel
import math
import time

import azure.cognitiveservices.speech as speechsdk
import config
from PyQt5.QtWidgets import *
from azure.cognitiveservices.speech import SpeechConfig


def recognizing(args):
    global labelText

    labelText.set(args.result.text)

    resultReco = args.result.text

    print(resultReco)


def recognized(args):
    global f
    if args.result.text.strip() != '':
        f.write(args.result.text + "\n")


speech_key,service_region = "",""  # key and service region goes here
speech_config = speechsdk.SpeechConfig(subscription=speech_key,region=service_region)

speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config)

result = speech_recognizer.recognizing.connect(recognizing)
speech_recognizer.recognized.connect(recognized)
speech_recognizer.start_continuous_recognition()

app = QApplication(sys.argv)
window = QWidget()
window.setwindowTitle("WindowTitle")
layout = QVBoxLayout()
label = QLabel(result)
layout.addWidget(label)
window.setLayout(layout)
window.show()
sys.exit(app.exec_())

`

现在输出!:

The output now!

更新 ..................................... ………………………………………………………………………………………………………………………………………………………… ………………………………………………………………………………………………………………………………………………………… ………………………………………………………………………………………………………………………………………… 我正在尝试另一种方法......我正在尝试实现一次性识别,但我无法理解如何在我说话时更新标签中的文本......

这是我的代码

import config
import azure.cognitiveservices.speech as speechsdk
import time
import wave

import sys

from PyQt5 import QtCore,QtWidgets
from PyQt5.QtWidgets import QApplication
from PyQt5.QtWidgets import QVBoxLayout
from PyQt5.QtWidgets import QPushButton
from PyQt5.QtWidgets import QWidget
from PyQt5.QtWidgets import QLabel
import math

from PyQt5.QtWidgets import *
from azure.cognitiveservices.speech import SpeechConfig

speech_key,""  # speech key and region goes here
speech_config = speechsdk.SpeechConfig(subscription=speech_key,region=service_region)
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config)


def speech_to_Print():
    

    # section of code for audio input
   
    result = speech_recognizer.recognize_once()
    sentence = result.text  # register the text from speech into sentence field
    print(sentence)
    return sentence


class QRecognizer(QtCore.QObject):
    textChanged = QtCore.pyqtSignal(str)

    def __init__(self,key,region,parent=None):
        super().__init__(parent)
        config = speechsdk.SpeechConfig(subscription=key,region=region)
        self._recognizer = speechsdk.SpeechRecognizer(speech_config=config)


def main():
    import sys

    app = QtWidgets.QApplication(sys.argv)

    speech_key,""  # speech key and region goes here
    qrecognizer = QRecognizer(speech_key,service_region)

    w = QtWidgets.QWidget()
    while True:
        newSentence = speech_to_Print()
        label = QtWidgets.QLabel(newSentence)

        qrecognizer.textChanged.connect(label.setText)

        lay = QtWidgets.QVBoxLayout(w)
        lay.addWidget(label)
        w.show()
        sys.exit(app.exec_())


if __name__ == "__main__":
    main()

解决方法

基本上,您需要创建一个将语音转换为文本的新线程,然后您将获得识别的文本作为信号。

from PyQt5.QtCore import QRunnable,QThreadPool,QObject,pyqtSignal
import azure.cognitiveservices.speech as speechsdk
from playsound import playsound

API_KEY = ""
REGION = ""


class Stt(QRunnable):
    def __init__(self):
        super().__init__()
        self.signals = WorkerSignals()

    def run(self) -> None:
        speech_config = speechsdk.SpeechConfig(subscription=API_KEY,region=REGION)
        speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config)
        playsound('music/StartBeep.wav')
        result = speech_recognizer.recognize_once()
        playsound("music/EndBeep.wav")

        if result.reason == speechsdk.ResultReason.RecognizedSpeech:
            self.signals.finished.emit(result.text)
        elif result.reason == speechsdk.ResultReason.NoMatch:
            print("No speech could be recognized")
        elif result.reason == speechsdk.ResultReason.Canceled:
            cancellation_details = result.cancellation_details
            print("Speech Recognition canceled: {}".format(cancellation_details.reason))
            if cancellation_details.reason == speechsdk.CancellationReason.Error:
                print("Error details: {}".format(cancellation_details.error_details))

    def start(self):
        QThreadPool.globalInstance().start(self)




    def start(self):
        QThreadPool.globalInstance().start(self)


class WorkerSignals(QObject):
    finished = pyqtSignal(str)

开始识别

stt=Stt()
stt.signals.finished.connect(callback_function)
stt.start()

不要使用相同的对象进行多次识别,而是通过将上述代码包装在一个函数中并调用该函数来每次创建一个新对象。

我已经写了一篇关于如何在 PyQt5 应用程序中使用 Azure 语音服务的详细教程,请访问 this 阅读教程,您还可以获取完整代码 here