问题描述
如何从 QAudioInput 实时获取音频输入,将其存储在 NumPy 数组中并将其传递给 SciPy FFT?我尝试过的:
from PyQt5.QtMultimedia import QAudioDeviceInfo,QAudioFormat,QAudioInput
import sys
class Window(QMainWindow):
def __init__(self):
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
# If no avaiable device is found,we display a error
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(44100)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
self.audioInput = QAudioInput(input_device,audio_format,self)
self.ioDevice = self.audioInput.start()
self.ioDevice.readyRead.connect(self.read_audio)
def read_audio(self):
data: QByteArray = self.ioDevice.readAll()
print(data.toUInt()) # Prints (0,False) which means error converting data
解决方法
受官方示例 Audio Example 的启发,我创建了一个允许获取数据的 QIODevice。下面的示例通过计算其 fft 并使用 matplotlib 显示它,每 T 秒获取最后 N 个样本。
import sys
import collections
from functools import cached_property
from PyQt5.QtCore import QIODevice,QObject,pyqtSignal,QTimer
from PyQt5.QtMultimedia import QAudioDeviceInfo,QAudioFormat,QAudioInput
from PyQt5.QtWidgets import QApplication,QMainWindow
from matplotlib.backends.backend_qt5agg import FigureCanvas
from matplotlib.figure import Figure
from scipy.fft import fft,fftfreq
import numpy as np
FS = 44100
SAMPLE_COUNT = 2 * 1000
class AudioDevice(QIODevice):
data_changed = pyqtSignal(list,name="dataChanged")
def __init__(self,interval=1000,parent: QObject = None):
super().__init__(parent)
self.m_buffer = collections.deque(
[0 for _ in range(SAMPLE_COUNT)],maxlen=SAMPLE_COUNT
)
self.timer.timeout.connect(self.send_data)
self.timer.setInterval(interval)
self.timer.start()
@cached_property
def timer(self):
return QTimer()
def send_data(self):
self.data_changed.emit(list(self.m_buffer))
def readData(self,data,max_size):
return -1
def writeData(self,data):
max_size = len(data)
resolution = 4
start = 0
available_samples = int(max_size) // resolution
if available_samples < self.m_buffer.maxlen:
start = self.m_buffer.maxlen - available_samples
pos = 0
for _ in range(start,self.m_buffer.maxlen):
y = (1.0 * (data[pos] - 128)) / 128.0
self.m_buffer.append(y)
pos += resolution
return (self.m_buffer.maxlen - start) * resolution
class PlotWidget(QMainWindow):
def __init__(self,parent=None):
super().__init__(parent)
self.canvas = FigureCanvas(Figure(figsize=(5,3)))
self.setCentralWidget(self.canvas)
self.ax = self.canvas.figure.subplots()
self._line = None
def update_data(self,data):
T = 1 / FS
N = SAMPLE_COUNT
yf = fft(data)
xf = fftfreq(N,T)[: N // 2]
x = xf
y = 2.0 / N * np.abs(yf[0 : N // 2])
if self._line is None:
(self._line,) = self.ax.plot(x,y)
else:
self._line.set_data(x,y)
self.canvas.draw()
def main(args):
app = QApplication(args)
plot_widget = PlotWidget()
plot_widget.resize(640,480)
plot_widget.show()
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(FS)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
audio_input = QAudioInput(input_device,audio_format,None)
audio_device = AudioDevice(interval=100)
audio_device.data_changed.connect(plot_widget.update_data)
audio_device.open(QIODevice.WriteOnly)
audio_input.start(audio_device)
app.exec_()
if __name__ == "__main__":
main(sys.argv)
,
data.toUInt()
将整个字节数组转换为一个 uint 值 - 不是您想要的。要获取示例值,您可以使用 numpy.frombuffer
或 struct.unpack
。
import numpy
def read_audio(self):
data = self.ioDevice.readAll()
values = numpy.frombuffer(data.data(),dtype=numpy.uint8)
或
import struct
def read_audio(self):
data = self.ioDevice.readAll()
fmt = "@{}B".format(data.size())
values = struct.unpack(fmt,data.data())
,
我添加了显示波形的小部件,以证明样本实际上反映了来自麦克风的信号 - 而不是随机数。
from PyQt5.QtMultimedia import QAudioDeviceInfo,QAudioInput
from PyQt5.QtWidgets import QMainWindow,QApplication,QWidget
from PyQt5.QtGui import QPainter,QPolygonF
from PyQt5.QtCore import QPointF
import sys
import numpy
class WaveWidget(QWidget):
def __init__(self,parent = None):
super().__init__(parent)
self._values = None
def setValues(self,values):
self._values = values
self.update()
def paintEvent(self,event):
if self._values is None:
return
painter = QPainter(self)
ys = self._values / 255 * self.height()
xs = numpy.linspace(0,self.width(),num = len(ys))
points = QPolygonF([QPointF(x,y) for x,y in zip(xs,ys)])
painter.drawPolyline(points)
class Window(QMainWindow):
def __init__(self):
super().__init__()
info = QAudioDeviceInfo()
input_device = info.defaultInputDevice()
if input_device.isNull():
# If no avaiable device is found,we display a error
print("There is no audio input device available.")
exit(-1)
audio_format = QAudioFormat()
audio_format.setSampleRate(44100)
audio_format.setSampleSize(8)
audio_format.setChannelCount(1)
audio_format.setCodec("audio/pcm")
audio_format.setSampleType(QAudioFormat.UnSignedInt)
if sys.byteorder == "little":
audio_format.setByteOrder(QAudioFormat.LittleEndian)
else:
audio_format.setByteOrder(QAudioFormat.BigEndian)
self.audioInput = QAudioInput(input_device,self)
self.ioDevice = self.audioInput.start()
self.ioDevice.readyRead.connect(self.read_audio)
widget = WaveWidget()
self._widget = widget
self.setCentralWidget(widget)
def read_audio(self):
data = self.ioDevice.readAll()
values = numpy.frombuffer(data.data(),dtype=numpy.uint8)
self._widget.setValues(values)
if __name__ == "__main__":
app = QApplication([])
window = Window()
window.show()
app.exec()