问题描述
好吧,由于FFMPEG文档和代码示例绝对是垃圾,我想我唯一的选择就是去这里询问。
所以我想做的就是简单地记录来自麦克风的音频并将其写入文件。因此,我初始化了输入和输出格式,得到了音频数据包,对其进行解码,重新采样,编码和写入。但是每次我尝试播放和音频时,只有一小段数据。由于某种原因,它似乎只写一个开始数据包。哪个还是很奇怪,让我解释一下原因:
if((response = swr_config_frame(resampleContext,audioOutputFrame,frame) < 0)) qDebug() << "can't configure frame!" << av_make_error(response);
if((response = swr_convert_frame(resampleContext,frame) < 0)) qDebug() << "can't resample frame!" << av_make_error(response);
这是我用来重新采样的代码。我的frame
有数据,但是swr_convert_frame
将空数据写入audioOutputFrame
该如何解决? FFMPEG确实使我发疯。
这是我班上的完整代码
VideoReader.h
#ifndef VIDEOREADER_H
#define VIDEOREADER_H
extern "C"
{
#include <libavcodec/avcodec.h>
#include <libavformat/avformat.h>
#include <libswscale/swscale.h>
#include <libavdevice/avdevice.h>
#include "libavutil/audio_fifo.h"
#include "libavformat/avio.h"
#include "libswresample/swresample.h"
#include <inttypes.h>
}
#include <QString>
#include <Qelapsedtimer>
class VideoReader
{
public:
VideoReader();
bool open(const char* filename);
bool fillFrame();
bool readFrame(uint8_t *&frameData);
void close();
int width,height;
private:
bool configinput();
bool configOutput(const char *filename);
bool configResampler();
bool encode(AVFrame *frame,AVCodecContext *encoderContext,AVPacket *outputPacket,int streamIndex,QString type);
int audioStreamIndex = -1;
int videoStreamIndex = -1;
int64_t videoStartPts = 0;
int64_t audioStartPts = 0;
AVFormatContext* inputFormatContext = nullptr;
AVFormatContext* outputFormatContext = nullptr;
AVCodecContext* videoDecoderContext = nullptr;
AVCodecContext* videoEncoderContext = nullptr;
AVCodecContext* audioDecoderContext = nullptr;
AVCodecContext* audioEncoderContext = nullptr;
AVFrame* videoInputFrame = nullptr;
AVFrame* audioInputFrame = nullptr;
AVFrame* videoOutputFrame = nullptr;
AVFrame* audioOutputFrame = nullptr;
AVPacket* inputPacket = nullptr;
AVPacket* videoOutputPacket = nullptr;
AVPacket* audioOutputPacket = nullptr;
SwsContext* innerScaleContext = nullptr;
SwsContext* outerScaleContext = nullptr;
SwrContext *resampleContext = nullptr;
};
#endif // VIDEOREADER_H
VideoReader.cpp
#include "VideoReader.h"
#include <QDebug>
static const char* av_make_error(int errnum)
{
static char str[AV_ERROR_MAX_STRING_SIZE];
memset(str,sizeof(str));
return av_make_error_string(str,AV_ERROR_MAX_STRING_SIZE,errnum);
}
VideoReader::VideoReader()
{
}
bool VideoReader::open(const char *filename)
{
if(!configinput()) return false;
if(!configOutput(filename)) return false;
if(!configResampler()) return false;
return true;
}
bool VideoReader::fillFrame()
{
auto convertToYUV = [=](AVFrame* frame)
{
int response = 0;
if((response = sws_scale(outerScaleContext,frame->data,frame->linesize,videoEncoderContext->height,videoOutputFrame->data,videoOutputFrame->linesize)) < 0) qDebug() << "can't rescale" << av_make_error(response);
};
auto convertAudio = [this](AVFrame* frame)
{
int response = 0;
auto& out = audioOutputFrame;
qDebug() << out->linesize[0] << out->nb_samples;
if((response = swr_convert_frame(resampleContext,frame)) < 0) qDebug() << "can't resample frame!" << av_make_error(response);
qDebug() << "poop";
};
auto decodeEncode = [=](AVPacket* inputPacket,AVFrame* inputFrame,AVCodecContext* decoderContext,AVPacket* outputPacket,AVFrame* outputFrame,AVCodecContext* encoderContext,std::function<void (AVFrame*)> convertFunc,int64_t startPts,QString type)
{
int response = avcodec_send_packet(decoderContext,inputPacket);
if(response < 0) { qDebug() << "Failed to send" << type << "packet!" << av_make_error(response); return false; }
response = avcodec_receive_frame(decoderContext,inputFrame);
if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(inputPacket); return false; }
else if (response < 0) { qDebug() << "Failed to decode" << type << "frame!" << response << av_make_error(response); return false; }
if(encoderContext)
{
outputFrame->pts = inputPacket->pts - startPts;
convertFunc(inputFrame);
if(!encode(outputFrame,encoderContext,outputPacket,streamIndex,type)) return false;
}
av_packet_unref(inputPacket);
return true;
};
while(av_read_frame(inputFormatContext,inputPacket) >= 0) //actually read packet
{
if(inputPacket->stream_index == videoStreamIndex)
{
if(!videoStartPts) videoStartPts = inputPacket->pts;
if(decodeEncode(inputPacket,videoInputFrame,videoDecoderContext,videoOutputPacket,videoOutputFrame,videoEncoderContext,convertToYUV,videoStreamIndex,videoStartPts,"video")) break;
}
else if(inputPacket->stream_index == audioStreamIndex)
{
if(!audioStartPts) audioStartPts = inputPacket->pts;
if(decodeEncode(inputPacket,audioInputFrame,audioDecoderContext,audioOutputPacket,audioEncoderContext,convertAudio,audioStreamIndex,audioStartPts,"audio")) break;
}
}
return true;
}
bool VideoReader::readFrame(uint8_t *&frameData)
{
if(!fillFrame()) { qDebug() << "readFrame method Failed!"; return false; };
const int bytesPerPixel = 4;
uint8_t* destination[bytesPerPixel] = {frameData,NULL,NULL};
int destinationLinesize[bytesPerPixel] = { videoInputFrame->width * bytesPerPixel,0};
sws_scale(innerScaleContext,videoInputFrame->data,videoInputFrame->linesize,videoInputFrame->height,destination,destinationLinesize);
return true;
}
void VideoReader::close()
{
encode(NULL,"video");
encode(NULL,"audio");
if(av_write_trailer(outputFormatContext) < 0) { qDebug() << "Failed to write trailer"; };
avformat_close_input(&outputFormatContext);
avformat_free_context(outputFormatContext);
avformat_close_input(&inputFormatContext);
avformat_free_context(inputFormatContext);
av_frame_free(&videoInputFrame);
av_frame_free(&audioInputFrame);
av_frame_free(&videoOutputFrame);
av_frame_free(&audioOutputFrame);
av_packet_free(&inputPacket);
av_packet_free(&videoOutputPacket);
av_packet_free(&audioOutputPacket);
avcodec_free_context(&videoDecoderContext);
avcodec_free_context(&videoEncoderContext);
avcodec_free_context(&audioDecoderContext);
avcodec_free_context(&audioEncoderContext);
sws_freeContext(innerScaleContext);
sws_freeContext(outerScaleContext);
swr_free(&resampleContext);
}
bool VideoReader::configinput()
{
avdevice_register_all();
inputFormatContext = avformat_alloc_context();
if(!inputFormatContext) { qDebug() << "can't create context!"; return false; }
const char* inputFormatName = "dshow";/*"gdigrab"*/
AVInputFormat* inputFormat = av_find_input_format(inputFormatName);
if(!inputFormat){ qDebug() << "Can't find" << inputFormatName; return false; }
AVDictionary* options = NULL;
av_dict_set(&options,"framerate","30",0);
av_dict_set(&options,"video_size","1920x1080",0);
if(avformat_open_input(&inputFormatContext,"video=HD USB Camera:audio=Microphone (High DeFinition Audio Device)" /*"desktop"*/,inputFormat,&options) != 0) { qDebug() << "can't open video file!"; return false; }
AVCodecParameters* videoCodecParams = nullptr;
AVCodecParameters* audioCodecParams = nullptr;
AVCodec* videoDecoder = nullptr;
AVCodec* audioDecoder = nullptr;
for (uint i = 0; i < inputFormatContext->nb_streams; ++i)
{
auto stream = inputFormatContext->streams[i];
auto codecParams = stream->codecpar;
if(codecParams->codec_type == AVMEDIA_TYPE_AUdio) { audioStreamIndex = i; audioDecoder = avcodec_find_decoder(codecParams->codec_id); audioCodecParams = codecParams; }
if(codecParams->codec_type == AVMEDIA_TYPE_VIDEO) { videoStreamIndex = i; videoDecoder = avcodec_find_decoder(codecParams->codec_id); videoCodecParams = codecParams; }
if(audioStreamIndex != -1 && videoStreamIndex != -1) break;
}
if(audioStreamIndex == -1) { qDebug() << "Failed to find audio stream inside file"; return false; }
if(videoStreamIndex == -1) { qDebug() << "Failed to find video stream inside file"; return false; }
auto configureCodecContext = [=](AVCodecContext*& context,AVCodec* decoder,AVCodecParameters* params,AVFrame*& frame,QString type)
{
context = avcodec_alloc_context3(decoder);
if(!context) { qDebug() << "Failed to create" << type << "decoder context!"; return false; }
if(avcodec_parameters_to_context(context,params) < 0) { qDebug() << "can't initialize input" << type << "decoder context"; return false; }
if(avcodec_open2(context,decoder,NULL) < 0) { qDebug() << "can't open" << type << "decoder"; return false; }
frame = av_frame_alloc();
if(!frame) { qDebug() << "can't allocate" << type << "frame"; return false; }
return true;
};
if(!configureCodecContext(videoDecoderContext,videoDecoder,videoCodecParams,"video")) return false;
if(!configureCodecContext(audioDecoderContext,audioDecoder,audioCodecParams,"audio")) return false;
audioDecoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
audioInputFrame->channel_layout = audioDecoderContext->channel_layout;
inputPacket = av_packet_alloc();
if(!inputPacket) { qDebug() << "can't allocate input packet!"; return false; }
//first frame,needed fo initialization
if(!fillFrame()) { qDebug() << "Failed to fill frame on init!"; return false; };
width = videoDecoderContext->width;
height = videoDecoderContext->height;
innerScaleContext = sws_getContext(width,height,videoDecoderContext->pix_fmt,width,AV_PIX_FMT_RGB0,SWS_FAST_BILINEAR,NULL);
outerScaleContext = sws_getContext(width,AV_PIX_FMT_YUV420P,NULL);
if(!innerScaleContext) { qDebug() << "Failed to initialize scaler context"; return false; }
return true;
}
bool VideoReader::configOutput(const char *filename)
{
avformat_alloc_output_context2(&outputFormatContext,filename);
if(!outputFormatContext) { qDebug() << "Failed to create output context"; return false; }
AVOutputFormat* outputFormat = outputFormatContext->oformat;
auto prepareOutputContext = [=](AVCodecContext*& encoderContext,std::function<void (AVCodecContext*,AVCodec*)> configureContextFunc,std::function<void (AVFrame*)> configureFrameFunc,AVCodecID codecId,AVPacket*& packet,QString type)
{
auto stream = avformat_new_stream(outputFormatContext,NULL);
if(!stream) { qDebug() << "Failed to allocate output" << type << "stream"; return false; }
AVCodec* encoder = avcodec_find_encoder(codecId);
if(!encoder) { qDebug() << "Failed to find" << type << "encoder!"; return false; }
encoderContext = avcodec_alloc_context3(encoder);
if(!encoderContext) { qDebug() << "Failed to create video encoder context!"; return false; }
configureContextFunc(encoderContext,encoder);
int result = avcodec_open2(encoderContext,encoder,NULL);
if(result < 0) { qDebug() << "Failed to open audio encoder" << av_make_error(result); return false; }
if(avcodec_parameters_from_context(stream->codecpar,encoderContext) < 0) { qDebug() << "Failed to copy parameters to audio output stream"; return false; }
packet = av_packet_alloc();
if(!packet) {qDebug() << "Failed allocate output" << type << "packet"; return false;}
frame = av_frame_alloc();
if(!frame) { qDebug() << "can't allocate output" << type << "frame"; return false; }
configureFrameFunc(frame);
av_frame_get_buffer(frame,0);
return true;
};
auto configureAudioFrame = [=](AVFrame* frame)
{
frame->nb_samples = audioEncoderContext->frame_size;
frame->format = audioEncoderContext->sample_fmt;
frame->sample_rate = audioEncoderContext->sample_rate;
frame->channel_layout = av_get_default_channel_layout(audioDecoderContext->channels);
};
auto configureAudioEncoderContext = [=](AVCodecContext* encoderContext,AVCodec* encoder)
{
encoderContext->bit_rate = 64000;
encoderContext->sample_fmt = encoder->sample_fmts[0];
encoderContext->sample_rate = 44100;
encoderContext->codec_type = AVMEDIA_TYPE_AUdio;
encoderContext->channel_layout = AV_CH_LAYOUT_STEREO;
encoderContext->channels = av_get_channel_layout_nb_channels(encoderContext->channel_layout);
};
auto configureVideoFrame = [=](AVFrame* frame)
{
frame->format = videoEncoderContext->pix_fmt;
frame->width = videoEncoderContext->width;
frame->height = videoEncoderContext->height;
};
auto configureVideoEncoderContext = [=](AVCodecContext* encoderContext,AVCodec* encoder)
{
encoderContext->width = videoDecoderContext->width;
encoderContext->height = videoDecoderContext->height;
encoderContext->pix_fmt = encoder->pix_fmts[0];
encoderContext->gop_size = 10;
encoderContext->max_b_frames = 1;
encoderContext->framerate = AVRational{30,1};
encoderContext->time_base = AVRational{1,30};
av_opt_set(encoderContext->priv_data,"preset","ultrafast",0);
av_opt_set(encoderContext->priv_data,"tune","zerolatency",0);
};
if(!prepareOutputContext(videoEncoderContext,configureVideoEncoderContext,configureVideoFrame,outputFormat->video_codec,"video")) return false;
if(!prepareOutputContext(audioEncoderContext,configureAudioEncoderContext,configureAudioFrame,outputFormat->audio_codec,"audio")) return false;
if(outputFormat->flags & AVFMT_GLOBALHEADER) outputFormat->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
int result = 0;
if(!(outputFormat->flags & AVFMT_NOFILE))
if((result = avio_open(&outputFormatContext->pb,filename,AVIO_FLAG_WRITE)) < 0)
{ qDebug() << "Failed to open file" << av_make_error(result); return false; }
result = avformat_write_header(outputFormatContext,NULL);
if(result < 0) {qDebug() << "Failed to write header!" << av_make_error(result); return false; }
return true;
}
bool VideoReader::configResampler()
{
resampleContext = swr_alloc_set_opts(NULL,av_get_default_channel_layout(audioEncoderContext->channels),audioEncoderContext->sample_fmt,audioEncoderContext->sample_rate,av_get_default_channel_layout(audioDecoderContext->channels),audioDecoderContext->sample_fmt,audioDecoderContext->sample_rate,NULL);
if (!resampleContext) { qDebug() << "Could not allocate resample context"; return false; }
int error;
if ((error = swr_init(resampleContext)) < 0) { qDebug() << "Could not open resample context"; swr_free(&resampleContext); return false; }
return true;
}
bool VideoReader::encode(AVFrame* frame,QString type)
{
int response;
response = avcodec_send_frame(encoderContext,frame);
if(response < 0) { qDebug() << "Failed to send" << type << "frame" << av_make_error(response); return false; }
while(response >= 0)
{
response = avcodec_receive_packet(encoderContext,outputPacket);
if(response == AVERROR(EAGAIN) || response == AVERROR_EOF) { av_packet_unref(outputPacket); continue; }
else if (response < 0) { qDebug() << "Failed to encode" << type << "frame!" << response << av_make_error(response); return false; }
outputPacket->stream_index = streamIndex;
AVStream *inStream = inputFormatContext->streams[streamIndex];
AVStream *outStream = outputFormatContext->streams[streamIndex];
av_packet_rescale_ts(outputPacket,inStream->time_base,outStream->time_base);
if((response = av_interleaved_write_frame(outputFormatContext,outputPacket)) != 0) { qDebug() << "Failed to write" << type << "packet!" << av_make_error(response); av_packet_unref(outputPacket); return false; }
av_packet_unref(outputPacket);
}
return true;
}
如果需要,我可以尝试写下简短的例子
解决方法
据我所知,在以下几种情况下,swr_convert_frame
可能什么也不写:
- 您没有正确初始化输出帧。如果是这样,请检查以下代码段:
audioFrame = av_frame_alloc();
if (audioFrame == NULL) {
// error handling
}
audioFrame->format = /* the sample format you'd like to use */;
audioFrame->channel_layout = audioCodecContext->channel_layout;
audioFrame->nb_samples = audioCodecContext->frame_size;
if (av_frame_get_buffer(encoder->audioFrame,0) < 0) {
// error handling
}
- 输入帧中的样本不足以产生完整的输出帧。如果是这样,则需要
swr_get_delay
。
if (swr_convert(swrContext,audioFrame->data,audioFrame->nb_samples,(uint8_t const**)frame->data,frame->nb_samples) < 0) {
// handle error
}
// do stuff with your audioFrame
...
while (swr_get_delay(swrContext,audioCodecContext->sample_rate)
> audioFrame->nb_samples) {
if (swr_convert(swrContext,NULL,0) < 0) {
// handle error
}
// do stuff with your audioFrame
}
无论如何,应提供更多信息,至少应提供最少的可重现样品以进行进一步诊断。
我必须同意libav
的文档太差,它也使我发疯。但是诅咒libav
的作者不会有任何帮助,而且,开放源代码贡献者不欠你任何东西。