使用ffmpeg API将pcm_alaw类型的音频数据包装到MKA音频文件中

问题描述

想象一下，在我的项目中，我收到了RTP个数据包，其中包含有效载荷type-8，以便稍后将此负载保存为音轨的第N部分。我从RTP数据包中提取了此负载，并将其保存到临时缓冲区：

...

while ((rtp = receiveRtpPackets()).withoutErrors()) {
   payloadData.push(rtp.getPayloadData());
}

audioGenerator.setPayloadData(payloadData);
audioGenerator.recordToFile();

...

用此有效负载填充一定大小的临时缓冲区后，我将处理此缓冲区，即提取整个有效负载并使用ffmpeg对其进行编码，以进一步保存为Matroska格式的音频文件。但是我有一个问题。由于RTP数据包的有效载荷为type 8，因此我必须将pcm_alaw格式的原始音频数据保存为mka音频格式。但是，当将原始数据pcm_alaw保存到音频文件时，我会从库中获得以下消息：

...

[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time
[libopus @ 0x18eff60] Queue input is backward in time

...

当您在vlc中打开音频文件时，没有任何播放（音频轨道时间戳丢失）。

我的项目的任务是简单地提取 pcm_alaw 数据并将其以mka格式包装在容器中。确定编解码器的最佳方法是使用av_guess_codec()函数，该函数依次自动选择所需的编解码器ID。但是，我不知道如何将原始数据正确打包到容器中。

请注意，我可以获取由RTP数据包类型（ All类型的RTP数据包有效负载）。我所知道的是，在任何情况下，我都必须将音频数据打包在mka容器中。

我还附加了我使用的代码（从this资源中借来）：

audiogenerater.h

extern "C"
{
#include "libavformat/avformat.h"
#include "libavcodec/avcodec.h"
#include "libswresample/swresample.h"
}

class AudioGenerater
{
public:

    AudioGenerater();
   ~AudioGenerater() = default;

    void generateAudioFileWithOptions(
            QString        fileName,QByteArray     pcmData,int            channel,int            bitRate,int            sampleRate,AVSampleFormat format);
            
private:

    // init Format
    bool initFormat(QString audioFileName);

private:

    AVCodec         *m_AudioCodec        = nullptr;
    AVCodecContext  *m_AudioCodecContext = nullptr;
    AVFormatContext *m_FormatContext     = nullptr;
    AVOutputFormat  *m_OutputFormat      = nullptr;
};

audiogenerater.cpp

AudioGenerater::AudioGenerater()
{
    av_register_all();
    avcodec_register_all();
}

AudioGenerater::~AudioGenerater()
{
    // ... 
}

bool AudioGenerater::initFormat(QString audioFileName)
{
    // Create an output Format context
    int result = avformat_alloc_output_context2(&m_FormatContext,nullptr,audioFileName.toLocal8Bit().data());
    if (result < 0) {
        return false;
    }

    m_OutputFormat = m_FormatContext->oformat;

    // Create an audio stream
    AVStream* audioStream = avformat_new_stream(m_FormatContext,m_AudioCodec);
    if (audioStream == nullptr) {
        avformat_free_context(m_FormatContext);
        return false;
    }

    // Set the parameters in the stream
    audioStream->id = m_FormatContext->nb_streams - 1;
    audioStream->time_base = { 1,8000 };
    result = avcodec_parameters_from_context(audioStream->codecpar,m_AudioCodecContext);
    if (result < 0) {
        avformat_free_context(m_FormatContext);
        return false;
    }

    // Print FormatContext @R_746_4045@ion
    av_dump_format(m_FormatContext,audioFileName.toLocal8Bit().data(),1);

    // Open file IO
    if (!(m_OutputFormat->flags & AVFMT_NOFILE)) {
        result = avio_open(&m_FormatContext->pb,AVIO_FLAG_WRITE);
        if (result < 0) {
            avformat_free_context(m_FormatContext);
            return false;
        }
    }

    return true;
}

void AudioGenerater::generateAudioFileWithOptions(
    QString _fileName,QByteArray _pcmData,int _channel,int _bitRate,int _sampleRate,AVSampleFormat _format)
{
    AVFormatContext* oc;
    if (avformat_alloc_output_context2(
            &oc,_fileName.toStdString().c_str())
        < 0) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }
    if (!oc) {
        printf("Could not deduce output format from file extension: using mka.\n");
        avformat_alloc_output_context2(
            &oc,"mka",_fileName.toStdString().c_str());
    }
    if (!oc) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }
    AVOutputFormat* fmt = oc->oformat;
    if (fmt->audio_codec == AV_CODEC_ID_NONE) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }

    AVCodecID codecID = av_guess_codec(
        fmt,_fileName.toStdString().c_str(),AVMEDIA_TYPE_AUdio);
    // Find Codec
    m_AudioCodec = avcodec_find_encoder(codecID);
    if (m_AudioCodec == nullptr) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }
    // Create an encoder context
    m_AudioCodecContext = avcodec_alloc_context3(m_AudioCodec);
    if (m_AudioCodecContext == nullptr) {
        qDebug() << "Error in line: " << __LINE__;
        return;
    }

    // Setting parameters
    m_AudioCodecContext->bit_rate = _bitRate;
    m_AudioCodecContext->sample_rate = _sampleRate;
    m_AudioCodecContext->sample_fmt = _format;
    m_AudioCodecContext->channels = _channel;

    m_AudioCodecContext->channel_layout = av_get_default_channel_layout(_channel);
    m_AudioCodecContext->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;

    // Turn on the encoder
    int result = avcodec_open2(m_AudioCodecContext,m_AudioCodec,nullptr);
    if (result < 0) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    // Create a package
    if (!initFormat(_fileName)) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    // write to the file header
    result = avformat_write_header(m_FormatContext,nullptr);
    if (result < 0) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    // Create Frame
    AVFrame* frame = av_frame_alloc();
    if (frame == nullptr) {
        avcodec_free_context(&m_AudioCodecContext);
        if (m_FormatContext != nullptr)
            avformat_free_context(m_FormatContext);
        return;
    }

    int nb_samples = 0;
    if (m_AudioCodecContext->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE) {
        nb_samples = 10000;
    }
    else {
        nb_samples = m_AudioCodecContext->frame_size;
    }

    // Set the parameters of the Frame
    frame->nb_samples = nb_samples;
    frame->format = m_AudioCodecContext->sample_fmt;
    frame->channel_layout = m_AudioCodecContext->channel_layout;

    // Apply for data memory
    result = av_frame_get_buffer(frame,0);
    if (result < 0) {
        av_frame_free(&frame);
        {
            avcodec_free_context(&m_AudioCodecContext);
            if (m_FormatContext != nullptr)
                avformat_free_context(m_FormatContext);
            return;
        }
    }

    // Set the Frame to be writable
    result = av_frame_make_writable(frame);
    if (result < 0) {
        av_frame_free(&frame);
        {
            avcodec_free_context(&m_AudioCodecContext);
            if (m_FormatContext != nullptr)
                avformat_free_context(m_FormatContext);
            return;
        }
    }

    int perFrameDataSize = frame->linesize[0];
    int count = _pcmData.size() / perFrameDataSize;
    bool needAddOne = false;
    if (_pcmData.size() % perFrameDataSize != 0) {
        count++;
        needAddOne = true;
    }

    int frameCount = 0;
    for (int i = 0; i < count; ++i) {
        // Create a Packet
        AVPacket* pkt = av_packet_alloc();
        if (pkt == nullptr) {
            avcodec_free_context(&m_AudioCodecContext);
            if (m_FormatContext != nullptr)
                avformat_free_context(m_FormatContext);
            return;
        }
        av_init_packet(pkt);

        if (i == count - 1)
            perFrameDataSize = _pcmData.size() % perFrameDataSize;

        // Synthesize WAV files
        memset(frame->data[0],perFrameDataSize);
        memcpy(frame->data[0],&(_pcmData.data()[perFrameDataSize * i]),perFrameDataSize);

        frame->pts = frameCount++;
        // send Frame
        result = avcodec_send_frame(m_AudioCodecContext,frame);
        if (result < 0)
            continue;

        // Receive the encoded Packet
        result = avcodec_receive_packet(m_AudioCodecContext,pkt);
        if (result < 0) {
            av_packet_free(&pkt);
            continue;
        }

        // write to file
        av_packet_rescale_ts(pkt,m_AudioCodecContext->time_base,m_FormatContext->streams[0]->time_base);
        pkt->stream_index = 0;
        result = av_interleaved_write_frame(m_FormatContext,pkt);
        if (result < 0)
            continue;

        av_packet_free(&pkt);
    }

    // write to the end of the file
    av_write_trailer(m_FormatContext);
    // Close file IO
    avio_closep(&m_FormatContext->pb);
    // Release Frame memory
    av_frame_free(&frame);

    avcodec_free_context(&m_AudioCodecContext);
    if (m_FormatContext != nullptr)
        avformat_free_context(m_FormatContext);
}

main.cpp

int main(int argc,char **argv)
{
    av_log_set_level(AV_LOG_TRACE);

    QFile file("rawDataOfPcmAlawType.bin");
    if (!file.open(qiodevice::ReadOnly)) {
        return EXIT_FAILURE;
    }
    QByteArray rawData(file.readAll());

    AudioGenerater generator;
    generator.generateAudioFileWithOptions(
               "test.mka",rawData,1,64000,8000,AV_SAMPLE_FMT_S16);

    return 0;
}

这是重要，您可以帮助我找到在pcm_alaw音频文件中录制MKA或其他数据格式的最合适方法。

我问每个知道什么都可以帮助的人（实施该项目的时间太少了）

解决方法

这些有用的链接将为您提供帮助：

libav中的数据处理顺序的很好概述：ffmpeg-libav-tutorial
来自ffmpeg开发人员本身的示例：avio_reading，resampling_audio，transcode_aac

c++encoder ffmpeg