捕获立体声音频数据

问题描述

我有一个MacOS Swift应用程序,可以处理从麦克风录制的音频数据。麦克风具有立体声功能,但我只能录制单声道数据。

在下面的代码中,如果我let alwaysMono = true,func setup()报告活动格式为立体声,但将其覆盖为单声道。一切都与单声道输入一起工作。

如果我let alwaysMono = false,setup()将nChannels设置为2。但是captureOutput不会获取任何数据。从UnsafeMutableaudiobufferlistPointer返回的AudioBuffer始终具有nil mData。如果我没有检查nil mData,程序将崩溃。

如何获得完整的立体声输入?

编辑:在captureOutput中,CMSampleBufferGetaudiobufferlistWithRetainedBlockBuffer返回错误代码-12737,该代码对应于kCMSampleBufferError_ArrayTooSmall。我已经检查了传递给captureOutput的sampleBuffer arg,但看不到任何明显的错误。但我不知道要寻找什么。

一个编辑:我使用内置的单声道麦克风对代码进行了测试,令我惊讶的是它还具有立体声,这表明我获取和使用AVCaptureDevice.activeFormat的方式显然存在问题。我不知道从这里去哪里。

class Recorder: NSObject,AVCaptureAudioDataOutputSampleBufferDelegate {
    let alwaysMono = false
    var nChannels:UInt32 = 1
    let session : AVCaptureSession!
    static let realTimeQueue = dispatchQueue(label: "com.myapp.realtime",qos: dispatchQoS( qosClass:dispatchQoS.QoSClass.userInitiated,relativePriority: 0 ))
    override init() {
        session = AVCaptureSession()
        super.init()
    }
    static var recorder:Recorder?
    static func record() ->Bool {
        if recorder == nil {
            recorder = Recorder()
            if !recorder!.setup(callback:record) {
                recorder = nil
                return false
            }
        }
        realTimeQueue.async {
            if !recorder!.session.isRunning {
                recorder!.session.startRunning()
            }
        }
        return true
    }
    static func pause() {
        recorder!.session.stopRunning()
    }
    func setup( callback:@escaping (()->Bool)) -> Bool {
        let device = AVCaptureDevice.default( for: AVMediaType.audio )
        if device == nil { return false }
        if let format = Recorder.getActiveFormat() {
            nChannels = format.mChannelLayoutTag == kAudioChannelLayoutTag_Stereo ? 2 : 1
            print("active format is \((nChannels==2) ? "Stereo" : "Mono")")
            if alwaysMono {
                print( "Overriding to mono" )
                nChannels = 1
            }
        }
        if #available(OSX 10.14,*) {
            let status = AVCaptureDevice.authorizationStatus( for:AVMediaType.audio )
            if status == .notDetermined {
                AVCaptureDevice.requestAccess(for: AVMediaType.audio ){ granted in
                    _ = callback()
                }
                return false
            } else if status != .authorized {
                return false
            }
        }
        var input : AVCaptureDeviceInput
        do {
            try device!.lockForConfiguration()
            try input = AVCaptureDeviceInput( device: device! )
            device!.unlockForConfiguration()
        } catch {
            device!.unlockForConfiguration()
            return false
        }
        let output = AVCaptureAudioDataOutput()
        output.setSampleBufferDelegate(self,queue: Intonia.realTimeQueue)
        let settings = [
            AVFormatIDKey: kAudioFormatLinearPCM,AVNumberOfChannelsKey : nChannels,AVSampleRateKey : 44100,AVLinearPCMBitDepthKey : 16,AVLinearPCMIsFloatKey : false
            ] as [String : Any]
        output.audioSettings = settings
        session.beginConfiguration()
        if !session.canAddInput( input ) {
            return false
        }
        session.addInput( input )
        if !session.canAddOutput( output ) {
            return false
        }
        session.addOutput( output )
        session.commitConfiguration()
        return true
    }
    func getActiveFormat() -> AudioFormatListItem? {
        if #available(OSX 10.15,*) {
            let device = AVCaptureDevice.default( for: AVMediaType.audio )
            if device == nil { return nil }
            let list = device!.activeFormat.formatDescription.audioFormatList
            if list.count < 1 { return nil }
            return list[0]
        }
        return nil
    }
    func captureOutput(_ captureOutput: AVCaptureOutput,didOutput sampleBuffer: CMSampleBuffer,from connection: AVCaptureConnection){
        var buffer: CMBlockBuffer? = nil
        var audiobufferlist = audiobufferlist(
            mNumberBuffers: 1,mBuffers: AudioBuffer(mNumberChannels: nChannels,mDataByteSize: 0,mData: nil)
        )
        CMSampleBufferGetaudiobufferlistWithRetainedBlockBuffer(
            sampleBuffer,bufferListSizeNeededOut: nil,bufferListOut: &audiobufferlist,bufferListSize: MemoryLayout<audiobufferlist>.size,blockBufferAllocator: nil,blockBufferMemoryAllocator: nil,flags: UInt32(kCMSampleBufferFlag_audiobufferlist_Assure16Bytealignment),blockBufferOut: &buffer
        )
        let abl = UnsafeMutableaudiobufferlistPointer(&audiobufferlist)
        for buff in abl {
            if buff.mData != nil {
                let count = Int(buff.mDataByteSize)/MemoryLayout<Int16>.size
                let samples = UnsafeMutablePointer<Int16>(OpaquePointer(buff.mData))
                process(samples:samples!,count:count)
            } else {
                print("No data!")
            }
        }
    }
    func process( samples: UnsafeMutablePointer<Int16>,count: Int ) {
        let firstValue = samples[0]
        print( "\(count) values received,first is \(firstValue)" )
    }
}

解决方法

这是一个有很多问题出在哪里的问题。首先,这取决于您使用的麦克风。是Mac上的那个吗?如果是这样,则通常为单声道。但是,您可以使自己成为一种变通办法,以仅测试它是否有效并且与always:bool无关。尝试录制两次,用单声道拍摄第一个频道,然后用第二个频道进行第二次录制,并使用不同的“加倍”代码。

我看到的第二件事可能是在设置功能中显式键入立体声通道,以确保它获取其2个通道,并始终使用带有显式== true或false语句的alwaysMono。看看您是否可以在代码期间同时打印一些频道信息,以便您确切知道出了什么问题。