FFMPEG在Android libstagefright上的扩
1.文章介绍
FFMPEG作为传统音视频编解码处理的开源项目被广泛使用,各种音视频终端应用如VLC,暴风影音等,又如各种手机终端播放音视频的应用等,都会多多少少使用FFMPEG某个特性,如decode,encode,filter,mux等,这篇文章就来分析在Android libstagefright框架中是如何扩展和使用FFMPEG的。
2.干货
本文以FFMPEG如何在Android源码中扩展并使用为路线来分析。
以Amlogic Android SDK源码为例:
\frameworks\av\media\libstagefright中使用了FFMPEG组件
AmFFmpegAdapter是FFMPEG组件扩展的入口,MediaPlayer通过StagefrightPlayer的调用至libstagefright,libstagefright组件中则包含libamffmpegadapter组件。
例如:在libamffmpegadapter组件中AmSimpleMediaExtractorPlugin的MediaExtractor扩展:
在MediaExtractor中声明了几个可能使用的情况,例如hevc/h.265:
/**MediaExtractor.cpp*/
sp<MediaExtractor> MediaExtractor::CreateEx(const sp<DataSource> &dataSource, bool isHEVC)
{
float confidence = 0;
String8 mime("");
sp<AMessage> meta(NULL);
if (!dataSource->sniff(&mime, &confidence, &meta)) {
confidence = 0;
}
float am_confidence = 0;
String8 am_mime("");
sp<AMessage> am_meta(NULL);
if(!sniffAmExtFormat(dataSource, &am_mime, &am_confidence, &am_meta)) {
am_confidence = 0;
}
if((!strcmp(mime.string(),MEDIA_MIMETYPE_AUDIO_WMA) || !strcmp(mime.string(),MEDIA_MIMETYPE_AUDIO_WMAPRO))
&& confidence>0
&& !strcmp(am_mime.string(),MEDIA_MIMETYPE_CONTAINER_ASF)
&& am_confidence>0)
{
//since amffpeg extractor is well performaced,why not use it,any quesion for this modification,contact me-->BUG#94436
ALOGI("[%s %d]for WMA: force useing Amffmpeg extractor[am_confidence/%f confidence/%f ammine/%s mime/%s]\n",
__FUNCTION__,__LINE__,am_confidence,confidence,am_mime.string(),mime.string());
confidence=0;
}
sp<MediaExtractor> extractor = NULL;
if(am_confidence > confidence || isHEVC) { // if hevc/h.265, use ffmpeg extractor anyhow.
mime = am_mime;
extractor = createAmMediaExtractor(dataSource, mime.string());
}
if(NULL == extractor.get()) {
extractor = MediaExtractor::Create(dataSource, mime.string());
}
return extractor;
}
/**AmSimpleMediaExtractorPlugin.cpp*/
bool sniffAmExtFormat(
const android::sp<android::DataSource> &source,
android::String8 *mimeType, float *confidence,
android::sp<android::AMessage> *msg) {
return android::SniffAmFFmpeg(source, mimeType, confidence, msg);
}
android::sp<android::MediaExtractor> createAmMediaExtractor(
const android::sp<android::DataSource> &source, const char *mime) {
android::MediaExtractor *ret = NULL;
ret = new android::AmFFmpegExtractor(source);
return ret;
}
android::sp<android::MediaExtractor> createAmMediaExtractorFromFd(int fd) {
ALOGD("[%s:%d], fd:%d", __FUNCTION__, __LINE__, fd);
android::MediaExtractor *ret = NULL;
ret = new android::AmFFmpegExtractor(fd);
return ret;
}
/**AmFFmpegExtractor.cpp*/
bool SniffAmFFmpeg(
const sp<DataSource> &source, String8 *mimeType, float *confidence,
sp<AMessage> *) {
av_register_all();
AVInputFormat *inputFormat = probeFormat(source);
if (NULL != inputFormat) {
const char *mimeDetected = convertInputFormatToMimeType(inputFormat);
if (NULL != mimeDetected) {
if(!strcmp(mimeDetected,MEDIA_MIMETYPE_CONTAINER_MATROSKA)){
if(get_codec_id(source, inputFormat)==1){
ALOGI("using MatroskaExtractor\n");
return false;
}
}
*mimeType = mimeDetected;
// only available when stagefright not support
*confidence = 0.05f;
if(!strcmp(mimeDetected,MEDIA_MIMETYPE_VIDEO_RM10) || !strcmp(mimeDetected,MEDIA_MIMETYPE_VIDEO_RM20) || !strcmp(mimeDetected,MEDIA_MIMETYPE_VIDEO_RM40)){
*confidence = 0.21f;
ALOGV("[%s %d] confidence 0.21", __FUNCTION__, __LINE__);
}
return true;
}
}
return false;
}
可以看到在使用libstagefright组件解复用h265的视频时,就会使用ffmpeg组件,在AmFFmpegExtractor::feedMore总算看到了熟悉的avcodec接口(比如说av_read_frame):
/**AmFFmpegExtractor.cpp*/
status_t AmFFmpegExtractor::feedMore() {
Mutex::Autolock autoLock(mLock);
status_t ret = OK;
AVPacket *packet = new AVPacket();
while (true) {
int res = av_read_frame(mFFmpegContext, packet);
if (res >= 0) {
uint32_t sourceIdx = kInvalidSourceIdx;
if (static_cast<size_t>(packet->stream_index) < mStreamIdxToSourceIdx.size()) {
sourceIdx = mStreamIdxToSourceIdx[packet->stream_index];
}
if (sourceIdx == kInvalidSourceIdx
|| !mSources[sourceIdx].mIsActive || packet->size <= 0/* || packet->pts < 0*/) {
av_free_packet(packet);
continue;
}
av_dup_packet(packet);
mSources[sourceIdx].mSource->queuePacket(packet);
} else {
delete packet;
ALOGV("No more packets from ffmpeg.");
ret = ERROR_END_OF_STREAM;
}
break;
}
return ret;
}
另外在AmFFmpegUtils组件中可以看到把libstagefright组件的数据类型转换为FFmpeg组件的数据类型:
AVInputFormat *probeFormat(const sp<DataSource> &source) {
AVInputFormat *format = NULL;
for (uint32_t bufSize = kProbeStartBufSize;
bufSize <= kProbeMaxBufSize; bufSize *= 2) {
// TODO: use av_probe_input_buffer() once we upgrade FFmpeg library
// instead of having a loop here.
AVProbeData probe_data;
probe_data.filename = kDummyFileName;
probe_data.buf = new unsigned char[bufSize];
//probe_data.s = NULL;
if (NULL == probe_data.buf) {
ALOGE("failed to allocate memory for probing file format.");
return NULL;
}
int32_t amountRead = source->readAt(0, probe_data.buf, bufSize);
probe_data.buf_size = amountRead;
int32_t score = 0;
format = av_probe_input_format2(&probe_data, 1, &score);
delete[] probe_data.buf;
if (format != NULL && score > AVPROBE_SCORE_MAX / 4) {
break;
}
}
return format;
}
其中source->readAt()就是libstagefright组件读取码流数据(具体如何读取可以参考我的文章MediaExtractor源码分析),把读取的buffer传递到av_probe_input_format2,实现了FFMPEG API的调用,在这个地方是根据输入的数据利用FFMPEG分析文件格式,至于AVInputFormat有什么用,那就看每个人对FFMPEG的熟悉程度。
AVFormatContext* openAVFormatContext(
AVInputFormat *inputFormat, AmFFmpegByteIOAdapter *adapter) {
CHECK(inputFormat != NULL);
CHECK(adapter != NULL);
ALOGD("[%s:%d]", __FUNCTION__, __LINE__);
AVFormatContext* context = avformat_alloc_context();
context->interrupt_callback.callback = interrupt_cb;
context->interrupt_callback.opaque = context;
int64_t start_time_bak = context->start_time;// just for backup
start_time = ALooper::GetNowUs();
context->flags |= AVFMT_FLAG_NONBLOCK;
context->flags |= AVFMT_NOFILE;
ALOGD("[%s:%d]", __FUNCTION__, __LINE__);
if (context == NULL) {
ALOGE("Failed to allocate AVFormatContext.");
return NULL;
}
ALOGD("[%s:%d]", __FUNCTION__, __LINE__);
context->pb = adapter->getContext();
ALOGD("try avformat_open_input");
int res = avformat_open_input(
&context,
kDummyFileName, // need to pass a filename
inputFormat, // probe the container format.
NULL); // no special parameters
if (res < 0) {
ALOGE("Failed to open the input stream.");
avformat_free_context(context);
return NULL;
} else
ALOGD("avformat_open_input success");
//context->probesize = 512 * 1024;
ALOGD("call avformat_find_stream_info");
res = avformat_find_stream_info(context, NULL);
if (res < 0 && strcmp(inputFormat->name, "hevc")) {
ALOGE("Failed to find stream information.");
ALOGD("inputFormat->name: %s",inputFormat->name);
avformat_close_input(&context);
return NULL;
} else
ALOGD("openAVFormatContext completes with: %p", context);
instance_counter++;
return context;
}
openAVFormatContext是很重要的函数,根据输入流创建了一个有效的AVFormatContext,AVFormatContext在FFMPEG组件中的重要性就不用多说了(反正大大的好,不清楚AVFormatContext有什么用的,建议先参考我的文章FFMPEG Android移植进阶里面关于转码的代码部分)。
status_t AmFFmpegSource::read(
MediaBuffer **out, const ReadOptions *options) {
ALOGV("%s %d", __FUNCTION__, __LINE__);
*out = NULL;
sp<AmFFmpegExtractor> extractor = mExtractor.promote();
if (NULL == extractor.get()) {
// The client should hold AmFFmpegExtractor while it is using source.
ALOGE("AmFFmpegExtractor has been released before stop using sources.");
return UNKNOWN_ERROR;
}
int64_t seekTimeUs;
ReadOptions::SeekMode seekMode;
AVPacket *packet = NULL;
if (mSeekable && options && options->getSeekTo(&seekTimeUs, &seekMode)) {
// hevc decoder may fail because of no extradata when seek instantly after start.
if(!strcmp(mMime, MEDIA_MIMETYPE_VIDEO_HEVC) && mStartRead == false
&& mStream->codec->extradata_size == 0) {
packet = dequeuePacket();
while (packet == NULL) {
if (ERROR_END_OF_STREAM == extractor->feedMore()) {
return ERROR_END_OF_STREAM;
}
packet = dequeuePacket();
}
int32_t cast_size = castHEVCSpecificData(packet->data, packet->size);
if(cast_size > 0) {
av_shrink_packet(packet, cast_size);
}
ALOGI("Need send hevc specific data first, size : %d", packet->size);
}
extractor->seekTo(seekTimeUs + mStartTimeUs, seekMode);
}
mStartRead = true;
if(packet == NULL) {
packet = dequeuePacket();
while (packet == NULL) {
if (ERROR_END_OF_STREAM == extractor->feedMore()) {
return ERROR_END_OF_STREAM;
}
packet = dequeuePacket();
}
}
MediaBuffer *buffer = NULL;
status_t ret = mGroup->acquire_buffer(&buffer);
if (ret != OK) {
return ret;
}
uint32_t requiredLen =
mFormatter->computeNewESLen(packet->data, packet->size);
int32_t hevc_header_size = 0;
if(mFirstPacket && !strcmp(mMime, MEDIA_MIMETYPE_VIDEO_HEVC) && mStream->codec->extradata_size > 0) {
hevc_header_size = 10 + mStream->codec->extradata_size;
requiredLen += hevc_header_size;
}
if (buffer->size() < requiredLen) {
size_t newSize = buffer->size();
while (newSize < requiredLen) {
newSize = 2 * newSize;
if (newSize > kMaxFrameBufferSize) {
break;
}
}
buffer->release();
buffer = NULL;
if (newSize > kMaxFrameBufferSize) {
return ERROR_BUFFER_TOO_SMALL;
}
resetBufferGroup(newSize);
status_t ret = mGroup->acquire_buffer(&buffer);
if (ret != OK) {
return ret;
}
}
int32_t filledLength = 0;
if(mFirstPacket && !strcmp(mMime, MEDIA_MIMETYPE_VIDEO_HEVC) && hevc_header_size > 0) {
const char * tag = "extradata";
memcpy(static_cast<uint8_t *>(buffer->data()), tag, 9);
static_cast<uint8_t *>(buffer->data())[9] = mStream->codec->extradata_size;
memcpy(static_cast<uint8_t *>(buffer->data()) + 10, static_cast<uint8_t *>(mStream->codec->extradata), mStream->codec->extradata_size);
filledLength = mFormatter->formatES(
packet->data, packet->size,
static_cast<uint8_t *>(buffer->data()) + hevc_header_size, buffer->size());
filledLength += hevc_header_size;
} else {
filledLength = mFormatter->formatES(
packet->data, packet->size,
static_cast<uint8_t *>(buffer->data()), buffer->size());
}
mFirstPacket = false;
if (filledLength <= 0) {
ALOGE("Failed to format packet data.");
buffer->release();
buffer = NULL;
return ERROR_MALFORMED;
}
if(AV_NOPTS_VALUE == packet->pts) {
packet->pts = mLastValidPts + 1;
packet->dts = mLastValidDts + 1;
mLastValidPts = packet->pts;
mLastValidDts = packet->dts;
ALOGE("meet invalid pts, set last pts to current frame pts:%lld dts:%lld",
mLastValidPts, mLastValidDts);
} else {
mLastValidPts = packet->pts;
mLastValidDts = packet->dts;
}
buffer->set_range(0, filledLength);
const bool isKeyFrame = (packet->flags & AV_PKT_FLAG_KEY) != 0;
const int64_t ptsFromFFmpeg =
(packet->pts == static_cast<int64_t>(AV_NOPTS_VALUE))
? kUnknownPTS : convertStreamTimeToUs(packet->pts);
const int64_t dtsFromFFmpeg =
(packet->dts == static_cast<int64_t>(AV_NOPTS_VALUE))
? kUnknownPTS : convertStreamTimeToUs(packet->dts);
const int64_t predictedPTSInUs = mPTSPopulator->computePTS(
packet->stream_index, ptsFromFFmpeg, dtsFromFFmpeg, isKeyFrame);
const int64_t normalizedPTSInUs = (predictedPTSInUs == kUnknownPTS)?
dtsFromFFmpeg - mStartTimeUs : ((predictedPTSInUs - mStartTimeUs < 0
&& predictedPTSInUs - mStartTimeUs > -10) ? 0 : predictedPTSInUs - mStartTimeUs); // starttime may exceed pts a little in some ugly streams.
buffer->meta_data()->setInt64(kKeyPTSFromContainer, ptsFromFFmpeg);
buffer->meta_data()->setInt64(kKeyDTSFromContainer, dtsFromFFmpeg);
buffer->meta_data()->setInt64(kKeyMediaTimeOffset, -mStartTimeUs);
// TODO: Make decoder know that this sample has no timestamp by setting
// OMX_BUFFERFLAG_TIMESTAMPINVALID flag once we move to OpenMax IL 1.2.
buffer->meta_data()->setInt64(kKeyTime, normalizedPTSInUs);
buffer->meta_data()->setInt32(kKeyIsSyncFrame, isKeyFrame ? 1 : 0);
*out = buffer;
av_free_packet(packet);
delete packet;
return OK;
}
再来看一看对Formatter的实现:
/**AmFFmpegSource.cpp*/
status_t AmFFmpegSource::init(
AVStream *stream, AVInputFormat *inputFormat,
AmFFmpegExtractor *extractor) {
...
mFormatter = StreamFormatter::Create(stream->codec, inputFormat);
mFormatter->addCodecMeta(mMeta);
return OK;
}
//static
sp<StreamFormatter> StreamFormatter::Create(
AVCodecContext *codec, AVInputFormat *format) {
ALOGI("Creating formatter for codec id : %u extradata size : %d",
codec->codec_id, codec->extradata_size);
const char *codecMime = convertCodecIdToMimeType(codec);
if (!strcmp(codecMime, MEDIA_MIMETYPE_VIDEO_AVC)
&& (format == av_find_input_format("mp4")
|| format == av_find_input_format("flv")
|| format == av_find_input_format("matroska"))) {
// Double check the extradata really includes AVCC (14496-15) structure
// because some matroska streams are already Annex-B framed and does not
// have AVCC. In this case, we fall back to the default formatter.
if (codec->extradata_size >= 7
&& reinterpret_cast<uint8_t *>(codec->extradata)[0] == 0x01) {
return new AVCCFormatter(codec);
}
} else if (!strcmp(codecMime, MEDIA_MIMETYPE_VIDEO_HEVC)
&& (format == av_find_input_format("mp4")
|| format == av_find_input_format("flv")
|| format == av_find_input_format("matroska"))) {
if (codec->extradata_size >= 22) {
return new HVCCFormatter(codec);
}
} else if (!strcmp(codecMime, MEDIA_MIMETYPE_AUDIO_AAC)
&& (format == av_find_input_format("mp4")
|| format == av_find_input_format("avi")
|| format == av_find_input_format("flv")
|| format == av_find_input_format("matroska"))
&& codec->extradata_size > 0) {
return new AACFormatter(codec);
} else if (!strcmp(codecMime, MEDIA_MIMETYPE_AUDIO_WMA)) {
return new WMAFormatter(codec);
} else if (!strcmp(codecMime, MEDIA_MIMETYPE_AUDIO_VORBIS)) {
return new VorbisFormatter(codec);
} else if (codec->codec_id == AV_CODEC_ID_PCM_BLURAY) {
return new PCMBlurayFormatter(codec);
} else if(!strcmp(codecMime, MEDIA_MIMETYPE_AUDIO_APE)){
return new APEFormatter(codec);
}
return new PassthruFormatter(codec);
}
比如拿WMAFormatter来说,WMAFormatter就是基于FFMPEG的对WMA音频的扩展:
/**WMAFormatter.cpp*/
WMAFormatter::WMAFormatter(AVCodecContext *codec)
: PassthruFormatter(codec),
mBlockAlign(0),
mBitsPerSample(0),
mFormatTag(0),
mInitCheck(false) {
if ( codec->codec_tag == 0x0160
||codec->codec_tag == 0x0161 // WMA
|| codec->codec_tag == 0x0162 // WMA Pro
|| codec->codec_tag == 0x0163) { // WMA Lossless
mBlockAlign = codec->block_align;
mBitsPerSample = codec->bits_per_coded_sample;
mFormatTag = codec->codec_tag;
mInitCheck = true;
} else {
ALOGW("Unsupported format tag %x", codec->codec_tag);
}
}
bool WMAFormatter::addCodecMeta(const sp<MetaData> &meta) const {
if (mInitCheck) {
meta->setInt32(kKeyWMABlockAlign, mBlockAlign);
meta->setInt32(kKeyWMABitsPerSample, mBitsPerSample);
meta->setInt32(kKeyWMAFormatTag, mFormatTag);
meta->setData(kKeyCodecSpecific, 0, mExtraData, mExtraSize);
return true;
}
return false;
}
在AmFFmpegExtractor组件的read方法中会调用computeNewESLen和formatES等方法,而WMAFormatter是继承于PassthruFormatter:
/**AmFFmpegExtractor.cpp*/
status_t AmFFmpegSource::read(
MediaBuffer **out, const ReadOptions *options) {
...
uint32_t requiredLen =
mFormatter->computeNewESLen(packet->data, packet->size);
...
int32_t filledLength = 0;
if(mFirstPacket && !strcmp(mMime, MEDIA_MIMETYPE_VIDEO_HEVC) && hevc_header_size > 0) {
const char * tag = "extradata";
memcpy(static_cast<uint8_t *>(buffer->data()), tag, 9);
static_cast<uint8_t *>(buffer->data())[9] = mStream->codec->extradata_size;
memcpy(static_cast<uint8_t *>(buffer->data()) + 10, static_cast<uint8_t *>(mStream->codec->extradata), mStream->codec->extradata_size);
filledLength = mFormatter->formatES(
packet->data, packet->size,
static_cast<uint8_t *>(buffer->data()) + hevc_header_size, buffer->size());
filledLength += hevc_header_size;
} else {
filledLength = mFormatter->formatES(
packet->data, packet->size,
static_cast<uint8_t *>(buffer->data()), buffer->size());
}
...
return OK;
}
/**PassthruFormatter.cpp*/
uint32_t PassthruFormatter::computeNewESLen(
const uint8_t* in, uint32_t inAllocLen) const {
return inAllocLen;
}
int32_t PassthruFormatter::formatES(
const uint8_t* in, uint32_t inAllocLen, uint8_t* out,
uint32_t outAllocLen) const {
if (!inAllocLen || inAllocLen > outAllocLen) {
return -1;
}
CHECK(in);
CHECK(out);
CHECK(in != out);
memcpy(out, in, inAllocLen);
return inAllocLen;
}
这就把FFMPEG解复用出得buffer转换成了我们平时开发需要的数据类型并使用,比如:
//如获取数据流
for (size_t i = 0; i < extractor->countTracks(); ++i) {
...
}
//如对read的简单封装 读取数据
err = extractor->readSampleData(buffer);
//如获取Meta数据
sp<MetaData> meta;
err = extractor->getSampleMeta(&meta);
CHECK_EQ(err, (status_t)OK);
实现了这些接口就可以通过FFMPEG来解复用,这就实现了FFMPEG在Android SDK上对应的音视频解复用扩展。
总结下自己的理解:
音视频数据在不同的封装方式时数据格式是不同的,比如TS流:
每188个字节对字节流作解析。
在playload负载中的PES层,stream id就可以区分是音频(音频取0xc0-0xdf,通常为0xc0)还是视频(视频取值0xe0-0xef,通常为0xe0)负载。
每个TS包是188个字节的数据流,那么要通过Extractor区分哪个包是音频流,哪个包是视频流,那么就要按这个规范去解析,所以在Android Extractor不支持某些特殊格式的解复用时,使用FFMPEG扩展的Extractor是为了能把那些特殊格式的音视频流解析出哪个流是音频,哪个流是视频,并且获取音视频的信息,这样才能在编解码时作出区别。这么解释不知道读者能不能清楚 ╮(╯▽╰)╭
3.结束语
整个框架调用的流程其实就是MediaCodec ----> ACodec ----> Open OMX Layer ----> ObjectCodec。在其他芯片SDK方案中,比如Hisi,AML,Mstar也会依赖FFMPEG来扩展某些编码,解码,音视频合成等,毕竟每个平台不一样,建议根据条件允许时根据个人经验选择性来学习。
下一篇打算从编解码器扩展来分析FFMPEG是如何使用在我们的框架之中的,谢谢关注。