问题描述
我制作了一个记录文件并将它们保存到文件目录的程序,它确实正确地保存了它们。但是,当我尝试打开它并查看记录的内容时,我发现它没有存储的音频数据。我不确定我做错了什么。请看一看,让我知道。
from playsound import playsound
from random import randrange
import pyttsx3
from datetime import datetime
import pyaudio
import speech_recognition as sr
import requests
import wave
import numpy as np
import sounddevice as sd
import math
import time
import os
import sys
import sounddevice as sd
from scipy.io.wavfile import write
import struct
def voiceDetection():
SoundThreshHold = 50
TimeoutLength = 5
chunk = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2 #Basicly audio output
RATE = 16000 #Rate at which you sample
f_name_directory = r"C:\Users\x\OneDrive\Desktop\Record"
def rms(data):
count = len(data)/2
format = "%dh"%(count)
shorts = struct.unpack( format,data )
sum_squares = 0.0
for sample in shorts:
n = sample * (1.0/32768)
sum_squares += n*n
return math.sqrt( sum_squares / count)*1000
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True,output=True,frames_per_buffer=chunk)
currentTime = time.time()
end = time.time() + TimeoutLength
frames = []
while currentTime < end:
currentTime = time.time()
data = stream.read(chunk)
if rms(data) >= SoundThreshHold:
#print(rms(data))
end = time.time() + TimeoutLength
frames.append(data)
n_files = len(os.listdir(f_name_directory))
filename = os.path.join(f_name_directory,'{}.wav'.format(n_files))
wf = wave.open(filename,'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(data)
wf.close()
print('Written to file: {}'.format(filename))
stream.stop_stream()
stream.close()
p.terminate()
voiceDetection()
解决方法
当前代码为每个块写入一个单独的 WAV 文件,并且始终使用相同的名称,因此该文件会覆盖为前一个块写入的任何 WAV。您可能打算在循环之前调用一次 wave.open
,在循环之后调用 wf.close
,以便为整个会话写入一个 WAV。
编辑:在音频录制期间穿插文件 IO 可能会造成过多的开销,无法在不丢失样本的情况下正确录制。您可以尝试缓冲内存中的所有样本,然后一次性写入 WAV。在pyaudio homepage上,有一个“录制”示例,可以录制几秒钟的音频并将其写入WAV文件:
"""PyAudio example: Record a few seconds of audio and save to a WAVE file."""
import pyaudio
import wave
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
RECORD_SECONDS = 5
WAVE_OUTPUT_FILENAME = "output.wav"
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True,frames_per_buffer=CHUNK)
print("* recording")
frames = []
for i in range(0,int(RATE / CHUNK * RECORD_SECONDS)):
data = stream.read(CHUNK)
frames.append(data)
print("* done recording")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(WAVE_OUTPUT_FILENAME,'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
作为一般注意事项,不幸的是,(标准 CPython)Python 解释器真正同时执行线程的能力有限,这使得 Python 成为实时音频应用程序的糟糕语言(另见 Does python support multiprocessor/multicore programming?)。根据您的项目目标,您可能希望切换到 C++ 并使用 portaudio C 库(pyaudio 基于该库)。