使用FFmpeg API重新采样音频

问题描述

我的任务是解码音频数据，将其重新编码为另一种格式，然后将编码后的数据保存到缓冲区中。我需要保存到缓冲区的编码数据在AVPacket::data中。我按照以下步骤保存了它们：

我从输入流中收到一个数据包
我将数据包发送给解码器
我得到了解密的帧
我将其发送到编码器
我得到了编码包
保存到缓冲区

所有过程均有效。但这就是问题所在。我需要在第3点和第4点之间创建一个“重采样”。在将数据发送到编码器之前，如果需要，它必须通过重采样。例如，我获得PCM_ALAW格式的音频数据，具有1个音频通道和8000采样率。退出时，我想获得PCM_S32LE，具有2个通道和44100的采样率。将音频格式PCM_ALAW转换为PCM_S32LE是可行的。但是我不知道如何实现重采样。

我对过采样功能的实现不完整，但是我不知道如何将它们放在一起。建议我使用this和this示例。但是我解决不了。

我提供了完整的代码。

audiodecoder.h

class AudioDecoder
{
public:

    AudioDecoder(const AudioDecoderSettings& settings);
    AudioDecoder& operator=(const AudioDecoder& other) = delete;
    AudioDecoder& operator=(AudioDecoder&& other)      = delete;
    AudioDecoder(const AudioDecoder& other)            = delete;
    AudioDecoder(AudioDecoder&& other)                 = delete;
    virtual ~AudioDecoder(void);

    virtual qint32 init(void) noexcept;
    //virtual QByteArray getData(const quint32 &size) noexcept;
    virtual QByteArray get() noexcept;
    virtual qint32 term(void) noexcept;

protected:

    virtual qint32 openInputStream (void) noexcept;
    virtual qint32 openEncoderForStream(void) noexcept;
    virtual qint32 decodeAudioFrame(AVFrame *frame);
    virtual qint32 encodeAudioFrame(AVFrame *frame);
    virtual qint32 initResampler(void);
    virtual qint32 initConvertedSamples(uint8_t ***converted_input_samples,int frame_size);

    class Deleter
    {
    public:
        static void cleanup(AVFormatContext* p);
        static void cleanup(AVCodecContext* p);
        static void cleanup(AudioDecoderSettings* p);
    };

protected:

    bool   m_edf;
    bool   m_initialized{ false };
    qint32 m_streamIndex{ 0 };
    QByteArray                                     m_buffer;
    QScopedPointer<AVFormatContext,Deleter> p_frmCtx{nullptr};
    QScopedPointer<AVCodecContext,Deleter> p_iCdcCtx{nullptr};
    QScopedPointer<AVCodecContext,Deleter> p_oCdcCtx{nullptr};
    QScopedPointer<AudioDecoderSettings,Deleter> p_settings{nullptr};
    SwrContext *swrCtx;
};

audiodecoder.cpp

static void initPacket(AVPacket *packet)
{
    av_init_packet(packet);
    // Set the packet data and size so that
    // it is recognized as being empty.
    packet->data = nullptr;
    packet->size = 0;
}

static QString error2string(const qint32& code)
{
    if (code < 0) {
        char errorBuffer[255]{ '0' };
        av_strerror(code,errorBuffer,sizeof(errorBuffer));
        return QString(errorBuffer);
    }
    return QString();
}

static void printErrorMessage(const QString &message,const qint32 &code = 0)
{
    qDebug() << "AudioDecoder: " << message << error2string(code);
}

static qint32 initInputFrame(AVFrame **frame)
{
    if (!(*frame = av_frame_alloc())) {
        printErrorMessage(QString("Could not allocate input frame"));
        return AVERROR(ENOMEM);
    }
    return 0;
}

void AudioDecoder::Deleter::cleanup(AVFormatContext* p)
{
    if (p) {
        avformat_close_input(&p);
    }
}

void AudioDecoder::Deleter::cleanup(AVCodecContext* p)
{
    if (p) {
        avcodec_free_context(&p);
    }
}

void AudioDecoder::Deleter::cleanup(AudioDecoderSettings* p)
{
    if (p) {
        delete p;
    }
}

AudioDecoder::AudioDecoder(const AudioDecoderSettings& settings)
    : m_edf(false),m_initialized(false),m_streamIndex(0),p_frmCtx(nullptr),p_iCdcCtx(nullptr),p_oCdcCtx(nullptr),p_settings(new AudioDecoderSettings(settings))
{
    av_register_all();
    avcodec_register_all();
}

qint32 AudioDecoder::openInputStream(void) noexcept
{
    qint32           error  = -1;
    AVCodecContext  *avctx  = nullptr;
    AVFormatContext *frmCtx = nullptr;

    // Open the input file to read from it.
    if ((error = avformat_open_input(&frmCtx,p_settings->inputFile().toStdString().c_str(),nullptr,nullptr)) < 0) {
        frmCtx = nullptr;
        printErrorMessage(QString("Could not open input file '%1' (error '%2')")
                          .arg(p_settings->inputFile()
                          .arg(error2string(error))));
        return error;
    }

    // Get information on the input file (number of streams etc.).
    if ((error = avformat_find_stream_info(frmCtx,nullptr)) < 0) {
        printErrorMessage(QString("Could not open find stream info (error '%1')")
                          .arg(error2string(error)));
        avformat_close_input(&frmCtx);
        return error;
    }

    // Find audio stream index
    auto getAudioStreamIndex = [](AVFormatContext *frmCtx) -> qint32
    {
        if (frmCtx->nb_streams != 1) {
            for (quint32 i = 0; i < frmCtx->nb_streams; ++i) {
                if (frmCtx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUdio) {
                    return i;
                }
            }
        }
        return 0;
    };

    if (frmCtx->streams[m_streamIndex =
            getAudioStreamIndex(frmCtx)]->codecpar->codec_type != AVMEDIA_TYPE_AUdio) {
        avformat_close_input(&frmCtx);
        printErrorMessage(QString("The audio stream was not found"));
        return -1;
    }

    AVCodec *codec = nullptr;
    // Find a decoder for the audio stream.
    if (!(codec = avcodec_find_decoder(
              frmCtx->streams[m_streamIndex]->codecpar->codec_id))) {
        printErrorMessage(QString("Could not find input codec"));
        avformat_close_input(&frmCtx);
        return -1;
    }

    // Allocate a new decoding context.
    avctx = avcodec_alloc_context3(codec);
    if (!avctx) {
        printErrorMessage(QString("Could not allocate a decoding context"));
        avformat_close_input(&frmCtx);
        return AVERROR(ENOMEM);
    }

    // Initialize the stream parameters with demuxer information.
    error = avcodec_parameters_to_context(
                avctx,frmCtx->streams[m_streamIndex]->codecpar);
    if (error < 0) {
        avformat_close_input(&frmCtx);
        avcodec_free_context(&avctx);
        return error;
    }

    // Open the decoder for the audio stream to use it later.
    if ((error = avcodec_open2(avctx,codec,nullptr)) < 0) {
        printErrorMessage(QString("Could not open input codec: "),error);
        avcodec_free_context(&avctx);
        avformat_close_input(&frmCtx);
        return error;
    }

    // Save the decoder context for easier access later.
    p_iCdcCtx.reset(avctx);
    p_frmCtx.reset(frmCtx);

    // Print detailed information about the input format
    av_dump_format(p_frmCtx.data(),0);
    return 0;
}

AudioDecoder::~AudioDecoder(void)
{
    term();
}

qint32 AudioDecoder::term(void) noexcept
{
    if (!m_initialized) {
        return -1;
    }
    if (p_frmCtx   != nullptr) {
        p_frmCtx.reset();
    }
    if (p_iCdcCtx  != nullptr) {
        p_iCdcCtx.reset();
    }
    if (p_oCdcCtx  != nullptr) {
        p_oCdcCtx.reset();
    }
    if (p_settings != nullptr) {
        p_settings.reset();
    }
    m_initialized = false;
    return (p_frmCtx && p_iCdcCtx && p_oCdcCtx && p_settings) ? -1 : 0;
}

qint32 AudioDecoder::init(void) noexcept
{
    if (m_initialized) {
        return 0;
    }
    if (p_settings->inputFile().isEmpty()) {
        return -1;
    }
    if (p_settings->audioCodec().isEmpty()) {
        return -1;
    }
    if (openInputStream() < 0) {
        return -1;
    }
    if (openEncoderForStream() < 0) {
        return -1;
    }
    if (initResampler() < 0) {
        return -1;
    }

    m_initialized = true;
    return 0;
}

qint32 AudioDecoder::openEncoderForStream(void) noexcept
{
    AVCodecContext *avctx = nullptr;
    AVCodec        *codec = nullptr;
    qint32          error = 0;

    // Set the basic encoder parameters.
    const quint32 sampleRate   = p_settings->sampleRate()   > 0
            ? p_settings->sampleRate()   : p_iCdcCtx->sample_rate;
    const quint16 channelCount = p_settings->channelCount() > 0
            ? p_settings->channelCount() : p_iCdcCtx->channels;
    const quint32 constBitRate = p_settings->constBitRate() > 0
            ? p_settings->constBitRate() : p_iCdcCtx->bit_rate;
    const QString encodeName   = p_settings->audioCodec() == "copy"
            ? QString(p_iCdcCtx->codec->name) : p_settings->audioCodec();

    if (!(codec = avcodec_find_encoder_by_name(
              encodeName.toStdString().c_str()))) {
        printErrorMessage(QString(
            "Could not find an %1 encoder").arg(p_settings->audioCodec()));
        return -1;
    }

    avctx = avcodec_alloc_context3(codec);
    if (!avctx) {
        printErrorMessage(QString("Could not allocate an encoding context"));
        avcodec_free_context(&avctx);
        return -1;
    }

    if (!codec->sample_fmts) {
        avcodec_free_context(&avctx);
        return -1;
    }

    avctx->channels              = channelCount;
    avctx->channel_layout        = av_get_default_channel_layout(channelCount);
    avctx->sample_rate           = sampleRate;
    avctx->bit_rate              = constBitRate;
    avctx->sample_fmt            = codec->sample_fmts[0];
    // Set the sample rate for the container.
    avctx->time_base.den         = sampleRate;
    avctx->time_base.num         = 1;
    // Allow the use of the experimental encoder.
    avctx->strict_std_compliance = FF_COMPLIANCE_EXPERIMENTAL;

    // Open the encoder for the audio stream to use it later.
    if ((error = avcodec_open2(avctx,nullptr)) < 0) {
        printErrorMessage(QString("Could not open output codec (error '%1')")
                          .arg(error2string(error)));
        avcodec_free_context(&avctx);
        return -1;
    }

    p_oCdcCtx.reset(avctx);
    return 0;
}

qint32 AudioDecoder::decodeAudioFrame(AVFrame *frame)
{
    // Packet used for temporary storage.
    AVPacket input_packet;
    qint32 error = 0;
    initPacket(&input_packet);

    // Read one audio frame from the input file into a temporary packet.
    if ((error = av_read_frame(p_frmCtx.data(),&input_packet)) < 0) {
        // If we are at the end of the file,flush the decoder below.
        if (error == AVERROR_EOF) {
            m_edf = true;
            return 0;
        }
        else {
            printErrorMessage(QString("Could not read frame (error '%1')")
                              .arg(error2string(error)));
            return error;
        }
    }

    if (input_packet.stream_index != m_streamIndex) {
        av_packet_unref(&input_packet);
        return -1;
    }

    // Send the audio frame stored in the temporary packet to the decoder.
    // The input audio stream decoder is used to do this.
    if ((error = avcodec_send_packet(p_iCdcCtx.data(),&input_packet)) < 0) {
        printErrorMessage(QString("Could not send packet for decoding (error '%1')")
                          .arg(error2string(error)));
        return error;
    }

    // Receive one frame from the decoder.
    error = avcodec_receive_frame(p_iCdcCtx.data(),frame);
    // If the decoder asks for more data to be able to decode a frame,// return indicating that no data is present.

    if (error == AVERROR(EAGAIN)) {
        error = 0;
    // If the end of the input file is reached,stop decoding.
    } else if (error == AVERROR_EOF) {
        m_edf = true;
        error = 0;
    } else if (error < 0) {
        printErrorMessage(QString("Could not decode frame (error '%1')")
                          .arg(error2string(error)));
    } else {
        error = 0;
    }
    av_packet_unref(&input_packet);
    return error;
}

qint32 AudioDecoder::encodeAudioFrame(AVFrame *frame)
{
    /* Packet used for temporary storage. */
    AVPacket output_packet;
    int error;
    initPacket(&output_packet);
    // Send the audio frame stored in the temporary packet to the encoder.
    // The output audio stream encoder is used to do this.
    error = avcodec_send_frame(p_oCdcCtx.data(),frame);
    // The encoder signals that it has nothing more to encode.
    if (error == AVERROR_EOF) {
        error = 0;
    } else if (error < 0) {
        printErrorMessage(QString("Could not send packet for encoding (error '%1')")
                          .arg(error2string(error)));
    }
    else {

        // Receive one encoded frame from the encoder.
        error = avcodec_receive_packet(p_oCdcCtx.data(),&output_packet);
        // If the encoder asks for more data to be able to provide an
        // encoded frame,return indicating that no data is present.
        if (error == AVERROR(EAGAIN)) {
            error = 0;
        /* If the last frame has been encoded,stop encoding. */
        } else if (error == AVERROR_EOF) {
            error = 0;
        } else if (error < 0) {
            printErrorMessage(QString("Could not encode frame (error '%1')")
                              .arg(error2string(error)));
        } else {

            // copy packet
            // output_packet.pts      = av_rescale_q_rnd(output_packet.pts,p_iCdcCtx->time_base,p_oCdcCtx->time_base,(enum AVRounding) (AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX) );
            // output_packet.dts      = av_rescale_q_rnd(output_packet.dts,(enum AVRounding) (AV_ROUND_NEAR_INF | AV_ROUND_PASS_MINMAX) );
            // output_packet.duration = av_rescale_q(output_packet.duration,p_oCdcCtx->time_base);
            // output_packet.pos      = -1;

            // Save decoded - encoded audio data
            for (int i = 0; i < output_packet.size; ++i) {
                m_buffer.push_back(output_packet.data[i]);
            }
        }
    }
    av_packet_unref(&output_packet);
    return error;
}

QByteArray AudioDecoder::get() noexcept
{
    AVFrame *frame = nullptr;
    if (initInputFrame(&frame) < 0) {
        return m_buffer;
    }

    while (!m_edf) {
        if (decodeAudioFrame(frame) < 0) {
            av_frame_free(&frame);
            return m_buffer;
        }

        // ????
        uint8_t **converted_input_samples = nullptr;
        if (initConvertedSamples(&converted_input_samples,frame->nb_samples) < 0) {
            if (converted_input_samples) {
                av_freep(&converted_input_samples[0]);
                free(converted_input_samples);
            }
            av_frame_free(&frame);
            return {};
        }



        if (encodeAudioFrame(frame) < 0) {
            av_frame_free(&frame);
            return m_buffer;
        }
        av_frame_unref(frame);
    }
    av_frame_free(&frame);
    return m_buffer;
}

qint32 AudioDecoder::initResampler(void)
{
    qint32 error = 0;
    // Create a resampler context for the conversion.
    // Set the conversion parameters. Default channel layouts based on the number of channels
    // are assumed for simplicity (they are sometimes not detected properly by the demuxer and/or decoder).
    swrCtx = swr_alloc_set_opts(
                nullptr,av_get_default_channel_layout(p_oCdcCtx->channels),p_oCdcCtx->sample_fmt,p_oCdcCtx->sample_rate,av_get_default_channel_layout(p_iCdcCtx->channels),p_iCdcCtx->sample_fmt,p_iCdcCtx->sample_rate,nullptr);
    if (!swrCtx) {
        printErrorMessage(QString("Could not allocate resample context"));
        return AVERROR(ENOMEM);
    }

    // Perform a sanity check so that the number of converted samples is
    // not greater than the number of samples to be converted.
    // If the sample rates differ,this case has to be handled differently
    av_assert0(p_oCdcCtx->sample_rate == p_iCdcCtx->sample_rate);

    // Open the resampler with the specified parameters.
    if ((error = swr_init(swrCtx)) < 0) {
        printErrorMessage(QString("Could not open resample context"));
        swr_free(&swrCtx);
        return error;
    }
    return 0;
}

qint32 AudioDecoder::initConvertedSamples(uint8_t ***converted_input_samples,int frame_size)
{
    qint32 error = 0;
    // Allocate as many pointers as there are audio channels.
    // Each pointer will later point to the audio samples of the corresponding
    // channels (although it may be NULL for interleaved formats).
    if (!(*converted_input_samples =
            (uint8_t **) calloc(p_oCdcCtx->channels,sizeof(**converted_input_samples)))) {
        printErrorMessage("Could not allocate converted input sample pointers");
        return AVERROR(ENOMEM);
    }
    
    // Allocate memory for the samples of all channels in one consecutive
    // block for convenience
    if ((error = av_samples_alloc(
             *converted_input_samples,p_oCdcCtx->channels,frame_size,0)) < 0) {

        printErrorMessage(QString("Could not allocate converted input samples (error '%1')")
                          .arg(error2string(error)));
        av_freep(&(*converted_input_samples)[0]);
        free(*converted_input_samples);
        return error;
    }
    return 0;
}

这是在将数据发送到编码器之前必须进行数据重采样的地方：

QByteArray AudioDecoder::get() noexcept
{
    AVFrame *frame = nullptr;
    if (initInputFrame(&frame) < 0) {
        return m_buffer;
    }

    while (!m_edf) {
        if (decodeAudioFrame(frame) < 0) {
            av_frame_free(&frame);
            return m_buffer;
        }

        // ???
        // ???
        // ???
        // This is where I have to implement data 
        // resampling before sending it to the encoder
        // ???
        // ???
        // ???

        if (encodeAudioFrame(frame) < 0) {
            av_frame_free(&frame);
            return m_buffer;
        }
        av_frame_unref(frame);
    }
    av_frame_free(&frame);
    return m_buffer;
}

解决方法

暂无找到可以解决该程序问题的有效方法，小编努力寻找整理中！

如果你已经找到好的解决方法，欢迎将解决方案带上本链接一起发送给小编。

小编邮箱:dio#foxmail.com (将#修改为@）

c++decode decode ffmpeg ffmpeg ffmpeg resampling