使用FFMPEG LibAV重采样音频

问题描述

好吧，由于FFMPEG文档和代码示例绝对是垃圾，我想我唯一的选择就是去这里询问。

所以我想做的就是简单地记录来自麦克风的音频并将其写入文件。因此，我初始化了输入和输出格式，得到了音频数据包，对其进行解码，重新采样，编码和写入。但是每次我尝试播放和音频时，只有一小段数据。由于某种原因，它似乎只写一个开始数据包。哪个还是很奇怪，让我解释一下原因：

if((response = swr_config_frame(resampleContext,audioOutputFrame,frame) < 0)) qDebug() << "can't configure frame!" <<  av_make_error(response);

if((response = swr_convert_frame(resampleContext,frame) < 0)) qDebug() << "can't resample frame!" <<  av_make_error(response);

这是我用来重新采样的代码。我的frame有数据，但是swr_convert_frame将空数据写入audioOutputFrame

该如何解决？ FFMPEG确实使我发疯。

这是我班上的完整代码

VideoReader.h

#ifndef VIDEOREADER_H
#define VIDEOREADER_H

extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavdevice/avdevice.h>
#include "libavutil/audio_fifo.h"
#include "libavformat/avio.h"
#include "libswresample/swresample.h"
#include <inttypes.h>
}

#include <QString>
#include <Qelapsedtimer>

class VideoReader
{
public:
    VideoReader();

    bool open(const char* filename);
    bool fillFrame();
    bool readFrame(uint8_t *&frameData);
    void close();

    int width,height;

private:
    bool configinput();
    bool configOutput(const char *filename);
    bool configResampler();

    bool encode(AVFrame *frame,AVCodecContext *encoderContext,AVPacket *outputPacket,int streamIndex,QString type);

    int audioStreamIndex = -1;
    int videoStreamIndex = -1;

    int64_t videoStartPts = 0;
    int64_t audioStartPts = 0;

    AVFormatContext* inputFormatContext = nullptr;
    AVFormatContext* outputFormatContext = nullptr;

    AVCodecContext* videoDecoderContext = nullptr;
    AVCodecContext* videoEncoderContext = nullptr;

    AVCodecContext* audioDecoderContext = nullptr;
    AVCodecContext* audioEncoderContext = nullptr;

    AVFrame* videoInputFrame = nullptr;
    AVFrame* audioInputFrame = nullptr;

    AVFrame* videoOutputFrame = nullptr;
    AVFrame* audioOutputFrame = nullptr;

    AVPacket* inputPacket = nullptr;

    AVPacket* videoOutputPacket = nullptr;
    AVPacket* audioOutputPacket = nullptr;

    SwsContext* innerScaleContext = nullptr;
    SwsContext* outerScaleContext = nullptr;

    SwrContext *resampleContext = nullptr;
};

#endif // VIDEOREADER_H

VideoReader.cpp

#include "VideoReader.h"

#include <QDebug>

static const char* av_make_error(int errnum)
{
    static char str[AV_ERROR_MAX_STRING_SIZE];
    memset(str,sizeof(str));
    return av_make_error_string(str,AV_ERROR_MAX_STRING_SIZE,errnum);
}

VideoReader::VideoReader()
{

}

bool VideoReader::open(const char *filename)
{
    if(!configinput()) return false;
    if(!configOutput(filename)) return false;
    if(!configResampler()) return false;

    return true;
}

bool VideoReader::fillFrame()
{
    auto convertToYUV = [=](AVFrame* frame)
    {
        int response = 0;

        if((response = sws_scale(outerScaleContext,frame->data,frame->linesize,videoEncoderContext->height,videoOutputFrame->data,videoOutputFrame->linesize)) < 0) qDebug() << "can't rescale" << av_make_error(response);
    };

    auto convertAudio = [this](AVFrame* frame)
    {
        int response = 0;

        auto& out = audioOutputFrame;
        qDebug() << out->linesize[0] << out->nb_samples;
        if((response = swr_convert_frame(resampleContext,frame)) < 0) qDebug() << "can't resample frame!" << av_make_error(response);
        qDebug() << "poop";
    };

    auto decodeEncode = [=](AVPacket* inputPacket,AVFrame* inputFrame,AVCodecContext* decoderContext,AVPacket* outputPacket,AVFrame* outputFrame,AVCodecContext* encoderContext,std::function<void (AVFrame*)> convertFunc,int64_t startPts,QString type)
    {
        int response = avcodec_send_packet(decoderContext,inputPacket);
        if(response < 0) { qDebug() << "Failed to send" << type << "packet!" <<  av_make_error(response); return false; }

        response = avcodec_receive_frame(decoderContext,inputFrame);
        if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(inputPacket); return false; }
        else if (response < 0) { qDebug() << "Failed to decode" << type << "frame!" << response << av_make_error(response); return false; }

        if(encoderContext)
        {
            outputFrame->pts = inputPacket->pts - startPts;

            convertFunc(inputFrame);
            if(!encode(outputFrame,encoderContext,outputPacket,streamIndex,type)) return false;
        }

        av_packet_unref(inputPacket);

        return true;
    };

    while(av_read_frame(inputFormatContext,inputPacket) >= 0) //actually read packet
    {
        if(inputPacket->stream_index == videoStreamIndex)
        {
            if(!videoStartPts) videoStartPts = inputPacket->pts;
            if(decodeEncode(inputPacket,videoInputFrame,videoDecoderContext,videoOutputPacket,videoOutputFrame,videoEncoderContext,convertToYUV,videoStreamIndex,videoStartPts,"video")) break;
        }
        else if(inputPacket->stream_index == audioStreamIndex)
        {
            if(!audioStartPts) audioStartPts = inputPacket->pts;
            if(decodeEncode(inputPacket,audioInputFrame,audioDecoderContext,audioOutputPacket,audioEncoderContext,convertAudio,audioStreamIndex,audioStartPts,"audio")) break;
        }
    }

    return true;
}

bool VideoReader::readFrame(uint8_t *&frameData)
{
    if(!fillFrame()) { qDebug() << "readFrame method Failed!"; return false; };

    const int bytesPerPixel = 4;

    uint8_t* destination[bytesPerPixel] = {frameData,NULL,NULL};
    int destinationLinesize[bytesPerPixel] = { videoInputFrame->width * bytesPerPixel,0};

    sws_scale(innerScaleContext,videoInputFrame->data,videoInputFrame->linesize,videoInputFrame->height,destination,destinationLinesize);

    return true;
}

void VideoReader::close()
{
    encode(NULL,"video");
    encode(NULL,"audio");

    if(av_write_trailer(outputFormatContext) < 0) { qDebug() << "Failed to write trailer"; };

    avformat_close_input(&outputFormatContext);
    avformat_free_context(outputFormatContext);
    avformat_close_input(&inputFormatContext);
    avformat_free_context(inputFormatContext);

    av_frame_free(&videoInputFrame);
    av_frame_free(&audioInputFrame);

    av_frame_free(&videoOutputFrame);
    av_frame_free(&audioOutputFrame);

    av_packet_free(&inputPacket);

    av_packet_free(&videoOutputPacket);
    av_packet_free(&audioOutputPacket);

    avcodec_free_context(&videoDecoderContext);
    avcodec_free_context(&videoEncoderContext);

    avcodec_free_context(&audioDecoderContext);
    avcodec_free_context(&audioEncoderContext);

    sws_freeContext(innerScaleContext);
    sws_freeContext(outerScaleContext);

    swr_free(&resampleContext);
}

bool VideoReader::configinput()
{
    avdevice_register_all();

    inputFormatContext = avformat_alloc_context();

    if(!inputFormatContext) { qDebug() << "can't create context!"; return false; }

    const char* inputFormatName = "dshow";/*"gdigrab"*/
    AVInputFormat* inputFormat = av_find_input_format(inputFormatName);

    if(!inputFormat){ qDebug() << "Can't find" << inputFormatName; return false; }

    AVDictionary* options = NULL;
    av_dict_set(&options,"framerate","30",0);
    av_dict_set(&options,"video_size","1920x1080",0);

    if(avformat_open_input(&inputFormatContext,"video=HD USB Camera:audio=Microphone (High DeFinition Audio Device)" /*"desktop"*/,inputFormat,&options) != 0) { qDebug() << "can't open video file!"; return false; }

    AVCodecParameters* videoCodecParams = nullptr;
    AVCodecParameters* audioCodecParams = nullptr;
    AVCodec* videoDecoder = nullptr;
    AVCodec* audioDecoder = nullptr;

    for (uint i = 0; i < inputFormatContext->nb_streams; ++i)
    {
        auto stream = inputFormatContext->streams[i];
        auto codecParams = stream->codecpar;

        if(codecParams->codec_type == AVMEDIA_TYPE_AUdio) { audioStreamIndex = i; audioDecoder = avcodec_find_decoder(codecParams->codec_id); audioCodecParams = codecParams; }
        if(codecParams->codec_type == AVMEDIA_TYPE_VIDEO) { videoStreamIndex = i; videoDecoder = avcodec_find_decoder(codecParams->codec_id); videoCodecParams = codecParams; }

        if(audioStreamIndex != -1 && videoStreamIndex != -1) break;
    }

    if(audioStreamIndex == -1) { qDebug() << "Failed to find audio stream inside file"; return false; }
    if(videoStreamIndex == -1) { qDebug() << "Failed to find video stream inside file"; return false; }

    auto configureCodecContext = [=](AVCodecContext*& context,AVCodec* decoder,AVCodecParameters* params,AVFrame*& frame,QString type)
    {
        context = avcodec_alloc_context3(decoder);
        if(!context) { qDebug() << "Failed to create" << type << "decoder context!"; return false; }

        if(avcodec_parameters_to_context(context,params) < 0) { qDebug() << "can't initialize input" << type << "decoder context"; return false; }

        if(avcodec_open2(context,decoder,NULL) < 0) { qDebug() << "can't open" << type << "decoder"; return false; }

        frame = av_frame_alloc();
        if(!frame) { qDebug() << "can't allocate" << type << "frame"; return false; }

        return true;
    };

    if(!configureCodecContext(videoDecoderContext,videoDecoder,videoCodecParams,"video")) return false;
    if(!configureCodecContext(audioDecoderContext,audioDecoder,audioCodecParams,"audio")) return false;

    audioDecoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
    audioInputFrame->channel_layout = audioDecoderContext->channel_layout;

    inputPacket = av_packet_alloc();
    if(!inputPacket) { qDebug() << "can't allocate input packet!";  return false; }

    //first frame,needed fo initialization
    if(!fillFrame()) { qDebug() << "Failed to fill frame on init!"; return false; };

    width = videoDecoderContext->width;
    height = videoDecoderContext->height;

    innerScaleContext = sws_getContext(width,height,videoDecoderContext->pix_fmt,width,AV_PIX_FMT_RGB0,SWS_FAST_BILINEAR,NULL);

    outerScaleContext = sws_getContext(width,AV_PIX_FMT_YUV420P,NULL);


    if(!innerScaleContext) { qDebug() << "Failed to initialize scaler context"; return false; }

    return true;
}

bool VideoReader::configOutput(const char *filename)
{
    avformat_alloc_output_context2(&outputFormatContext,filename);
    if(!outputFormatContext) { qDebug() << "Failed to create output context"; return false; }

    AVOutputFormat* outputFormat = outputFormatContext->oformat;

    auto prepareOutputContext = [=](AVCodecContext*& encoderContext,std::function<void (AVCodecContext*,AVCodec*)> configureContextFunc,std::function<void (AVFrame*)> configureFrameFunc,AVCodecID codecId,AVPacket*& packet,QString type)
    {
        auto stream = avformat_new_stream(outputFormatContext,NULL);
        if(!stream) { qDebug() << "Failed to allocate output" << type << "stream"; return false; }

        AVCodec* encoder = avcodec_find_encoder(codecId);
        if(!encoder) { qDebug() << "Failed to find" << type << "encoder!"; return false; }

        encoderContext = avcodec_alloc_context3(encoder);
        if(!encoderContext) { qDebug() << "Failed to create video encoder context!"; return false; }

        configureContextFunc(encoderContext,encoder);

        int result = avcodec_open2(encoderContext,encoder,NULL);
        if(result < 0) { qDebug() << "Failed to open audio encoder" << av_make_error(result); return false; }
        if(avcodec_parameters_from_context(stream->codecpar,encoderContext) < 0) { qDebug() << "Failed to copy parameters to audio output stream"; return false; }

        packet = av_packet_alloc();
        if(!packet) {qDebug() << "Failed allocate output" << type << "packet"; return false;}

        frame = av_frame_alloc();
        if(!frame) { qDebug() << "can't allocate output" << type << "frame"; return false; }

        configureFrameFunc(frame);

        av_frame_get_buffer(frame,0);

        return true;
    };

    auto configureAudioFrame = [=](AVFrame* frame)
    {
        frame->nb_samples = audioEncoderContext->frame_size;
        frame->format = audioEncoderContext->sample_fmt;
        frame->sample_rate = audioEncoderContext->sample_rate;
        frame->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
    };

    auto configureAudioEncoderContext = [=](AVCodecContext* encoderContext,AVCodec* encoder)
    {
        encoderContext->bit_rate = 64000;
        encoderContext->sample_fmt = encoder->sample_fmts[0];
        encoderContext->sample_rate = 44100;
        encoderContext->codec_type = AVMEDIA_TYPE_AUdio;
        encoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
        encoderContext->channels = av_get_channel_layout_nb_channels(encoderContext->channel_layout);
    };

    auto configureVideoFrame = [=](AVFrame* frame)
    {
        frame->format = videoEncoderContext->pix_fmt;
        frame->width  = videoEncoderContext->width;
        frame->height = videoEncoderContext->height;
    };

    auto configureVideoEncoderContext = [=](AVCodecContext* encoderContext,AVCodec* encoder)
    {
        encoderContext->width = videoDecoderContext->width;
        encoderContext->height = videoDecoderContext->height;
        encoderContext->pix_fmt = encoder->pix_fmts[0];
        encoderContext->gop_size = 10;
        encoderContext->max_b_frames = 1;
        encoderContext->framerate = AVRational{30,1};
        encoderContext->time_base = AVRational{1,30};

        av_opt_set(encoderContext->priv_data,"preset","ultrafast",0);
        av_opt_set(encoderContext->priv_data,"tune","zerolatency",0);
    };

    if(!prepareOutputContext(videoEncoderContext,configureVideoEncoderContext,configureVideoFrame,outputFormat->video_codec,"video")) return false;
    if(!prepareOutputContext(audioEncoderContext,configureAudioEncoderContext,configureAudioFrame,outputFormat->audio_codec,"audio")) return false;

    if(outputFormat->flags & AVFMT_GLOBALHEADER) outputFormat->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    int result = 0;
    if(!(outputFormat->flags & AVFMT_NOFILE))
        if((result = avio_open(&outputFormatContext->pb,filename,AVIO_FLAG_WRITE)) < 0)
            { qDebug() << "Failed to open file" <<  av_make_error(result); return false; }

    result = avformat_write_header(outputFormatContext,NULL);
    if(result < 0) {qDebug() << "Failed to write header!" << av_make_error(result); return false; }

    return true;
}

bool VideoReader::configResampler()
{

    resampleContext = swr_alloc_set_opts(NULL,av_get_default_channel_layout(audioEncoderContext->channels),audioEncoderContext->sample_fmt,audioEncoderContext->sample_rate,av_get_default_channel_layout(audioDecoderContext->channels),audioDecoderContext->sample_fmt,audioDecoderContext->sample_rate,NULL);
    if (!resampleContext) { qDebug() << "Could not allocate resample context"; return false; }

    int error;
    if ((error = swr_init(resampleContext)) < 0) { qDebug() << "Could not open resample context"; swr_free(&resampleContext); return false; }

    return true;
}

bool VideoReader::encode(AVFrame* frame,QString type)
{
    int response;

    response = avcodec_send_frame(encoderContext,frame);
    if(response < 0) { qDebug() << "Failed to send" << type << "frame" << av_make_error(response); return false; }

    while(response >= 0)
    {
        response = avcodec_receive_packet(encoderContext,outputPacket);
        if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(outputPacket); continue; }
        else if (response < 0) { qDebug() << "Failed to encode" << type << "frame!" << response << av_make_error(response); return false; }

        outputPacket->stream_index = streamIndex;

        AVStream *inStream = inputFormatContext->streams[streamIndex];
        AVStream *outStream = outputFormatContext->streams[streamIndex];

        av_packet_rescale_ts(outputPacket,inStream->time_base,outStream->time_base);

        if((response = av_interleaved_write_frame(outputFormatContext,outputPacket)) != 0) { qDebug() << "Failed to write" << type << "packet!" <<  av_make_error(response); av_packet_unref(outputPacket); return false; }

        av_packet_unref(outputPacket);
    }

    return true;
}

如果需要，我可以尝试写下简短的例子

解决方法

据我所知，在以下几种情况下，swr_convert_frame可能什么也不写：

您没有正确初始化输出帧。如果是这样，请检查以下代码段：

  audioFrame = av_frame_alloc();
  if (audioFrame == NULL) {
    // error handling
  }
  audioFrame->format = /* the sample format you'd like to use */; 
  audioFrame->channel_layout = audioCodecContext->channel_layout;
  audioFrame->nb_samples = audioCodecContext->frame_size;
  if (av_frame_get_buffer(encoder->audioFrame,0) < 0) {
    // error handling
  }

输入帧中的样本不足以产生完整的输出帧。如果是这样，则需要swr_get_delay。

if (swr_convert(swrContext,audioFrame->data,audioFrame->nb_samples,(uint8_t const**)frame->data,frame->nb_samples) < 0) {
  // handle error
}
// do stuff with your audioFrame
...

while (swr_get_delay(swrContext,audioCodecContext->sample_rate)
       > audioFrame->nb_samples) {
  if (swr_convert(swrContext,NULL,0) < 0) {
    // handle error
  }
  // do stuff with your audioFrame
}

无论如何，应提供更多信息，至少应提供最少的可重现样品以进行进一步诊断。

我必须同意libav的文档太差，它也使我发疯。但是诅咒libav的作者不会有任何帮助，而且，开放源代码贡献者不欠你任何东西。

c++ffmpeg libav