前言 Android 平台的视频播放器,从系统的 MediaPlayer 到开源的 ExoPlayer、ijkplayer,核心引擎几乎无一例外地构建在 FFmpeg 之上。FFmpeg 提供了从解封装(demux)、解码(decode)、格式转换(scale/resample)到编码(encode)的全套音视频处理原语,但在 Android NDK 环境下,将这些原语组装为一个稳定、低延迟、A/V 精确同步 的播放器内核,仍然是一项涉及多线程并发、实时时钟同步、Native 窗口渲染的复杂系统工程。
本文从零构建一个基于 FFmpeg 5.x 的 C/C++ 播放器内核,覆盖解封装线程、音视频解码线程、A/V 同步策略、ANativeWindow 渲染、AAudio 音频输出、Seek 实现和状态机设计,提供全部可编译运行的核心代码。
环境 : Android NDK r26, FFmpeg 5.1 (mobile-ffmpeg 或自编译), API 26+
一、播放器架构全景 1.1 多线程流水线模型 一个视频播放器的核心工作流由三条线程组成流水线:
┌──────────┐ AVPacket Queue ┌──────────┐ AVFrame Queue ┌──────────┐ │ Demuxer │ ─────────────────→│ Video │ ────────────────→ │ Renderer │ │ Thread │ (blocking Q) │ Decoder │ (blocking Q) │ Thread │ │ │ │ Thread │ │ (ANative │ │ read → ──┤ │ decode │ │ Window) │ │ av_read │ │ │ │ │ │ _frame │ └───────────┘ └───────────┘ │ │ │ │ AVPacket Queue ┌──────────┐ AVFrame Queue ┌──────────┐ │ │ ─────────────────→│ Audio │ ────────────────→ │ Audio │ │ │ (blocking Q) │ Decoder │ (blocking Q) │ Renderer │ │ │ │ Thread │ │ (AAudio) │ └───────────┘ └───────────┘ └───────────┘
三条线程各自独立运行,通过线程安全的阻塞队列 进行数据交换:
Demuxer Thread :生产者,产出 AVPacket
Decoder Threads :消费者(消耗 Packet)+ 生产者(产出 AVFrame)
Renderer Thread :消费者(消耗 Frame),驱动音画同步时钟
1.2 状态机设计 ┌─────────┐ │ IDLE │ ← 初始状态 └────┬────┘ │ prepare() ┌────▼────┐ │PREPARING│ 加载文件→打开流→查找解码器→启动线程 └────┬────┘ │ onPrepared ┌────▼────┐ ┌─────│PREPARED│ │ └────┬────┘ │ │ play() │ ┌────▼────┐ pause()│ │ PLAYING │──┐ │ └────┬────┘ │ │ │pause()│ │ ┌────▼────┐ │ └────→│ PAUSED │◄─┘ └────┬────┘ │ stop() ┌────▼────┐ │ STOPPED │ └────┬────┘ │ release() ┌────▼────┐ │RELEASED │ 释放所有 Native 资源 └─────────┘
特殊中间状态 SEEKING :从 PLAYING 或 PAUSED 状态进入 SEEKING,完成 seek 操作后返回原状态。
二、线程安全阻塞队列 2.1 数据结构 播放器中所有线程间数据传递都依赖阻塞队列。以下是基于 pthread_mutex + pthread_cond 的通用模板实现:
#include <pthread.h> #include <stdint.h> #include <stdlib.h> #define QUEUE_CAPACITY 60 #define AUDIO_QUEUE_CAPACITY 120 typedef struct PacketNode { AVPacket *pkt; struct PacketNode *next ; } PacketNode; typedef struct { PacketNode *head; PacketNode *tail; int size; int capacity; int abort_request; pthread_mutex_t mutex; pthread_cond_t cond; } PacketQueue; int packet_queue_init (PacketQueue *q, int capacity) { memset (q, 0 , sizeof (PacketQueue)); q->capacity = capacity; pthread_mutex_init(&q->mutex, NULL ); pthread_cond_init(&q->cond, NULL ); return 0 ; } int packet_queue_put (PacketQueue *q, AVPacket *pkt) { pthread_mutex_lock(&q->mutex); while (q->size >= q->capacity && !q->abort_request) { pthread_cond_wait(&q->cond, &q->mutex); } if (q->abort_request) { pthread_mutex_unlock(&q->mutex); return -1 ; } PacketNode *node = malloc (sizeof (PacketNode)); node->pkt = av_packet_alloc(); av_packet_ref(node->pkt, pkt); node->next = NULL ; if (q->tail) q->tail->next = node; else q->head = node; q->tail = node; q->size++; pthread_cond_signal(&q->cond); pthread_mutex_unlock(&q->mutex); return 0 ; } int packet_queue_get (PacketQueue *q, AVPacket *pkt) { pthread_mutex_lock(&q->mutex); while (q->size == 0 && !q->abort_request) { pthread_cond_wait(&q->cond, &q->mutex); } if (q->abort_request) { pthread_mutex_unlock(&q->mutex); return -1 ; } PacketNode *node = q->head; q->head = node->next; if (!q->head) q->tail = NULL ; q->size--; av_packet_move_ref(pkt, node->pkt); av_packet_free(&node->pkt); free (node); pthread_cond_signal(&q->cond); pthread_mutex_unlock(&q->mutex); return 0 ; } void packet_queue_abort (PacketQueue *q) { pthread_mutex_lock(&q->mutex); q->abort_request = 1 ; pthread_cond_broadcast(&q->cond); pthread_mutex_unlock(&q->mutex); } void packet_queue_flush (PacketQueue *q) { pthread_mutex_lock(&q->mutex); PacketNode *node = q->head; while (node) { PacketNode *next = node->next; av_packet_free(&node->pkt); free (node); node = next; } q->head = NULL ; q->tail = NULL ; q->size = 0 ; pthread_cond_broadcast(&q->cond); pthread_mutex_unlock(&q->mutex); } void packet_queue_destroy (PacketQueue *q) { packet_queue_flush(q); pthread_mutex_destroy(&q->mutex); pthread_cond_destroy(&q->cond); }
关键设计点 :
引用计数传输 :av_packet_ref() / av_packet_move_ref() 利用 FFmpeg 内部的引用计数机制,避免大块数据的深拷贝。
有界队列 :capacity 限制防止内存暴涨(视频帧队列 60 帧约等于 2 秒@30fps 的缓冲)。
abort_request 机制 :当播放器需要停止/释放时,通过设置此标志唤醒所有正在阻塞的消费者,避免死锁。
三、解封装线程(Demuxer Thread) 3.1 文件打开与流信息探测 typedef struct { AVFormatContext *fmt_ctx; int video_stream_idx; int audio_stream_idx; AVStream *video_stream; AVStream *audio_stream; int64_t duration_us; } DemuxerContext; int demuxer_open (DemuxerContext *ctx, const char *url) { ctx->fmt_ctx = NULL ; ctx->video_stream_idx = -1 ; ctx->audio_stream_idx = -1 ; int ret = avformat_open_input(&ctx->fmt_ctx, url, NULL , NULL ); if (ret < 0 ) { char errbuf[256 ]; av_strerror(ret, errbuf, sizeof (errbuf)); LOGE("avformat_open_input failed: %s" , errbuf); return ret; } ret = avformat_find_stream_info(ctx->fmt_ctx, NULL ); if (ret < 0 ) { LOGE("avformat_find_stream_info failed" ); return ret; } ctx->video_stream_idx = av_find_best_stream( ctx->fmt_ctx, AVMEDIA_TYPE_VIDEO, -1 , -1 , NULL , 0 ); ctx->audio_stream_idx = av_find_best_stream( ctx->fmt_ctx, AVMEDIA_TYPE_AUDIO, -1 , -1 , NULL , 0 ); if (ctx->video_stream_idx >= 0 ) { ctx->video_stream = ctx->fmt_ctx->streams[ctx->video_stream_idx]; } if (ctx->audio_stream_idx >= 0 ) { ctx->audio_stream = ctx->fmt_ctx->streams[ctx->audio_stream_idx]; } if (ctx->fmt_ctx->duration != AV_NOPTS_VALUE) { ctx->duration_us = ctx->fmt_ctx->duration; } return 0 ; }
3.2 解封装主循环 void *demuxer_thread (void *arg) { PlayerContext *player = (PlayerContext *)arg; AVPacket *pkt = av_packet_alloc(); while (!player->abort_request) { if (player->video_stream_idx >= 0 && player->video_pkt_queue.size > player->video_pkt_queue.capacity - 5 ) { av_usleep(10000 ); continue ; } if (player->audio_stream_idx >= 0 && player->audio_pkt_queue.size > player->audio_pkt_queue.capacity - 5 ) { av_usleep(10000 ); continue ; } int ret = av_read_frame(player->fmt_ctx, pkt); if (ret < 0 ) { if (ret == AVERROR_EOF || avio_feof(player->fmt_ctx->pb)) { break ; } if (player->fmt_ctx->pb && player->fmt_ctx->pb->error) { LOGE("IO error" ); break ; } continue ; } if (pkt->stream_index == player->video_stream_idx) { packet_queue_put(&player->video_pkt_queue, pkt); } else if (pkt->stream_index == player->audio_stream_idx) { packet_queue_put(&player->audio_pkt_queue, pkt); } av_packet_unref(pkt); } AVPacket null_pkt = {0 }; packet_queue_put(&player->video_pkt_queue, &null_pkt); packet_queue_put(&player->audio_pkt_queue, &null_pkt); av_packet_free(&pkt); LOGI("Demuxer thread exited" ); return NULL ; }
av_find_best_stream 的选流逻辑 :该函数按照如下优先级选择流:codec_type 匹配 > 用户指定流索引 > 默认流标记 > 非附属流。这意味着对于包含多音轨的文件,它会自动选择默认音轨。
四、视频解码线程 4.1 解码器初始化 typedef struct { AVCodecContext *codec_ctx; AVCodec *codec; int stream_idx; AVStream *stream; AVRational time_base; } DecoderContext; int decoder_open (DecoderContext *ctx, AVFormatContext *fmt_ctx, int stream_idx) { ctx->stream_idx = stream_idx; ctx->stream = fmt_ctx->streams[stream_idx]; ctx->time_base = ctx->stream->time_base; ctx->codec = avcodec_find_decoder( ctx->stream->codecpar->codec_id); if (!ctx->codec) { LOGE("Codec not found for stream %d" , stream_idx); return -1 ; } ctx->codec_ctx = avcodec_alloc_context3(ctx->codec); avcodec_parameters_to_context(ctx->codec_ctx, ctx->stream->codecpar); if (ctx->codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) { ctx->codec_ctx->thread_count = 4 ; ctx->codec_ctx->thread_type = FF_THREAD_FRAME; } int ret = avcodec_open2(ctx->codec_ctx, ctx->codec, NULL ); if (ret < 0 ) { LOGE("avcodec_open2 failed" ); return ret; } return 0 ; }
帧级并行(FF_THREAD_FRAME)原理 :H.264/H.265 等现代编码格式将每一帧划分为多个 slice,每个 slice 可以独立解码。FF_THREAD_FRAME 利用这一特性,在不同线程上并行解码同一帧的不同 slice,显著提升软件解码吞吐率。
4.2 解码主循环 void *video_decode_thread (void *arg) { PlayerContext *player = (PlayerContext *)arg; AVPacket pkt; AVFrame *frame = av_frame_alloc(); while (!player->abort_request) { int ret = packet_queue_get(&player->video_pkt_queue, &pkt); if (ret < 0 ) break ; if (pkt.data == NULL ) { avcodec_send_packet(player->video_dec_ctx, NULL ); av_packet_unref(&pkt); break ; } ret = avcodec_send_packet(player->video_dec_ctx, &pkt); av_packet_unref(&pkt); if (ret < 0 ) { LOGE("video avcodec_send_packet error: %d" , ret); continue ; } while (1 ) { ret = avcodec_receive_frame(player->video_dec_ctx, frame); if (ret == AVERROR(EAGAIN)) { break ; } if (ret == AVERROR_EOF) { goto decode_exit; } if (ret < 0 ) { LOGE("video avcodec_receive_frame error: %d" , ret); break ; } if (frame->pts != AV_NOPTS_VALUE) { frame->pts = av_rescale_q(frame->pts, player->video_stream->time_base, AV_TIME_BASE_Q); } frame_queue_put(&player->video_frame_queue, frame); } } decode_exit: av_frame_free(&frame); LOGI("Video decode thread exited" ); return NULL ; }
五、音频解码线程与重采样 5.1 SwrContext 配置 音频解码后的格式(采样率、声道布局、采样格式)因源文件而异,必须通过 libswresample 转换为 AAudio 期望的格式:
SwrContext *init_swr (AVCodecContext *codec_ctx, int out_sample_rate, int out_channels, enum AVSampleFormat out_fmt) { SwrContext *swr = swr_alloc(); av_opt_set_chlayout(swr, "in_chlayout" , &codec_ctx->ch_layout, 0 ); av_opt_set_int(swr, "in_sample_rate" , codec_ctx->sample_rate); av_opt_set_sample_fmt(swr, "in_sample_fmt" , codec_ctx->sample_fmt); AVChannelLayout out_layout; av_channel_layout_default(&out_layout, out_channels); av_opt_set_chlayout(swr, "out_chlayout" , &out_layout, 0 ); av_opt_set_int(swr, "out_sample_rate" , out_sample_rate); av_opt_set_sample_fmt(swr, "out_sample_fmt" , out_fmt); swr_init(swr); return swr; } int convert_audio_frame (SwrContext *swr, AVFrame *src, uint8_t *dst_buf, int dst_samples) { uint8_t *out_data[1 ] = { dst_buf }; int out_samples = swr_convert(swr, out_data, dst_samples, (const uint8_t **)src->data, src->nb_samples); return out_samples; }
Planar vs Interleaved 格式转换 :FFmpeg 内部使用 planar 格式(如 AV_SAMPLE_FMT_FLTP:左声道和右声道数据分开存储)。而 Android AAudio 期望 interleaved 格式(LRLRLR…)。swr_convert() 自动完成此转换。
5.2 音频解码循环(含重采样) void *audio_decode_thread (void *arg) { PlayerContext *player = (PlayerContext *)arg; AVPacket pkt; AVFrame *frame = av_frame_alloc(); while (!player->abort_request) { int ret = packet_queue_get(&player->audio_pkt_queue, &pkt); if (ret < 0 ) break ; if (pkt.data == NULL ) { av_packet_unref(&pkt); break ; } ret = avcodec_send_packet(player->audio_dec_ctx, &pkt); av_packet_unref(&pkt); if (ret < 0 ) continue ; while (1 ) { ret = avcodec_receive_frame(player->audio_dec_ctx, frame); if (ret == AVERROR(EAGAIN)) break ; if (ret < 0 ) break ; if (frame->pts != AV_NOPTS_VALUE) { frame->pts = av_rescale_q(frame->pts, player->audio_stream->time_base, AV_TIME_BASE_Q); } int out_samples = swr_get_out_samples(player->swr_ctx, frame->nb_samples); int buf_size = av_samples_get_buffer_size( NULL , 2 , out_samples, AV_SAMPLE_FMT_S16, 0 ); uint8_t *out_buf = malloc (buf_size); int actual_samples = swr_convert( player->swr_ctx, &out_buf, out_samples, (const uint8_t **)frame->data, frame->nb_samples); audio_output_enqueue(player, out_buf, actual_samples); } } av_frame_free(&frame); return NULL ; }
六、A/V 同步策略 6.1 主时钟选择 同步策略的核心问题是:当视频帧和音频帧的 PTS 不一致时,以谁为准?
选择音频时钟作为主时钟的理由: ┌──────────────────────────────────────────────────────────┐ │ 人耳对音频中断(pop/crackle/gap)的敏感度远高于眼睛 │ │ 对丢帧的感知。音频时钟由硬件 DMA 驱动,无外部扰动。 │ │ 视频可以通过丢帧/重复来追赶,音频则不可。 │ └──────────────────────────────────────────────────────────┘
外部时钟(视频时钟) 的使用场景:当没有音频流时(如纯视频文件、静音视频),使用系统时钟 av_gettime_relative() 作为主时钟。
6.2 时钟实现 typedef struct { double pts; double pts_drift; double last_updated; pthread_mutex_t mutex; } Clock; void clock_init (Clock *c) { c->pts = 0.0 ; c->pts_drift = 0.0 ; c->last_updated = 0.0 ; pthread_mutex_init(&c->mutex, NULL ); } void clock_set (Clock *c, double pts, double system_time) { pthread_mutex_lock(&c->mutex); c->pts = pts; c->pts_drift = pts - system_time; c->last_updated = system_time; pthread_mutex_unlock(&c->mutex); } double clock_get (Clock *c) { pthread_mutex_lock(&c->mutex); double now = av_gettime_relative() / 1000000.0 ; double pts = c->pts_drift + now; pthread_mutex_unlock(&c->mutex); return pts; } double get_master_clock (PlayerContext *player) { if (player->audio_stream_idx >= 0 ) { return audio_clock_get(&player->audio_clock); } return clock_get(&player->ext_clock); }
6.3 视频帧调度(丢帧/等待) double video_refresh (PlayerContext *player, AVFrame *frame) { double pts = frame->pts * av_q2d(AV_TIME_BASE_Q); double master_clock = get_master_clock(player); double delay = pts - master_clock; if (delay > 0.1 ) { double frame_delay = av_q2d(player->video_stream->avg_frame_rate); double actual_delay = fmax(0.0 , fmin(delay, delay + frame_delay * 2 )); av_usleep((int64_t )(actual_delay * 1000000 )); return actual_delay; } else if (delay < -0.05 ) { return -1.0 ; } return 0.0 ; }
丢帧阈值 50ms 的依据 :在 60fps 显示器上,每帧间隔约 16.67ms。50ms 约等于 3 帧的延迟。过低的阈值会导致频繁丢帧(用户感知卡顿),过高的阈值会导致 A/V 不同步。
6.4 音频时钟更新 void audio_callback_update_clock (PlayerContext *player, int bytes_written, int sample_rate, int channels) { int samples_written = bytes_written / (2 * channels); double duration = (double )samples_written / sample_rate; double pts = audio_clock_get(&player->audio_clock); audio_clock_set(&player->audio_clock, pts + duration, av_gettime_relative() / 1000000.0 ); }
七、视频渲染:ANativeWindow 7.1 初始化 #include <android/native_window_jni.h> #include <android/native_window.h> ANativeWindow *native_window = NULL ; void set_surface (JNIEnv *env, jobject surface) { if (native_window) { ANativeWindow_release(native_window); native_window = NULL ; } if (surface) { native_window = ANativeWindow_fromSurface(env, surface); } } void configure_window (ANativeWindow *window, int width, int height) { native_window_set_buffers_geometry( window, width, height, WINDOW_FORMAT_RGBA_8888 ); }
7.2 YUV → RGBA 转换并渲染 void render_frame (PlayerContext *player, AVFrame *frame) { ANativeWindow *window = player->native_window; if (!window) return ; int video_w = player->video_dec_ctx->width; int video_h = player->video_dec_ctx->height; native_window_set_buffers_geometry(window, video_w, video_h, WINDOW_FORMAT_RGBA_8888); ANativeWindow_Buffer buf; if (ANativeWindow_lock(window, &buf, NULL ) < 0 ) { LOGE("ANativeWindow_lock failed" ); return ; } uint8_t *dst_data[1 ] = { (uint8_t *)buf.bits }; int dst_linesize[1 ] = { buf.stride * 4 }; sws_scale(player->sws_ctx, (const uint8_t * const *)frame->data, frame->linesize, 0 , video_h, dst_data, dst_linesize); ANativeWindow_unlockAndPost(window); }
注意 buf.stride * 4:ANativeWindow 的 stride 是按 像素 计算的,而 sws_scale 的 linesize 需要按字节 计算。对于 RGBA_8888 格式,每个像素 4 字节,因此 linesize = stride × 4。
7.3 SwsContext 初始化(YUV→RGBA) SwsContext *init_sws (int src_w, int src_h, enum AVPixelFormat src_fmt, int dst_w, int dst_h, enum AVPixelFormat dst_fmt) { return sws_getContext( src_w, src_h, src_fmt, dst_w, dst_h, dst_fmt, SWS_FAST_BILINEAR, NULL , NULL , NULL ); }
八、音频输出:AAudio 8.1 AAudioStream 初始化 #include <aaudio/AAudio.h> AAudioStream *create_audio_stream (int sample_rate, int channel_count, int format, int frames_per_burst) { AAudioStreamBuilder *builder; AAudio_createStreamBuilder(&builder); AAudioStreamBuilder_setDirection(builder, AAUDIO_DIRECTION_OUTPUT); AAudioStreamBuilder_setPerformanceMode(builder, AAUDIO_PERFORMANCE_MODE_LOW_LATENCY); AAudioStreamBuilder_setSharingMode(builder, AAUDIO_SHARING_MODE_EXCLUSIVE); AAudioStreamBuilder_setFormat(builder, format); AAudioStreamBuilder_setChannelCount(builder, channel_count); AAudioStreamBuilder_setSampleRate(builder, sample_rate); AAudioStreamBuilder_setFramesPerDataCallback(builder, frames_per_burst); AAudioStreamBuilder_setDataCallback(builder, audio_data_callback, player); AAudioStream *stream; aaudio_result_t result = AAudioStreamBuilder_openStream(builder, &stream); AAudioStreamBuilder_delete(builder); if (result != AAUDIO_OK) { LOGE("AAudioStreamBuilder_openStream failed: %d" , result); return NULL ; } int32_t actual_rate = AAudioStream_getSampleRate(stream); int32_t actual_channels = AAudioStream_getChannelCount(stream); LOGI("AAudio opened: rate=%d, channels=%d, format=%d, burst=%d" , actual_rate, actual_channels, format, AAudioStream_getFramesPerBurst(stream)); return stream; }
8.2 数据回调(AAudio 驱动的音频输出) aaudio_data_callback_result_t audio_data_callback ( AAudioStream *stream, void *user_data, void *audio_data, int32_t num_frames) { PlayerContext *player = (PlayerContext *)user_data; int bytes_per_frame = 2 * 2 ; int bytes_needed = num_frames * bytes_per_frame; int bytes_read = audio_output_dequeue(player, (uint8_t *)audio_data, bytes_needed); if (bytes_read < bytes_needed) { memset ((uint8_t *)audio_data + bytes_read, 0 , bytes_needed - bytes_read); player->audio_underrun = 1 ; } else { player->audio_underrun = 0 ; } int frames_written = bytes_read / bytes_per_frame; double duration = (double )frames_written / AAudioStream_getSampleRate(stream); audio_clock_set(&player->audio_clock, audio_clock_get(&player->audio_clock) + duration, av_gettime_relative() / 1000000.0 ); return AAUDIO_CALLBACK_RESULT_CONTINUE; }
8.3 Underrun 检测 void check_underrun (PlayerContext *player) { if (player->audio_stream) { int32_t xruns = AAudioStream_getXRunCount(player->audio_stream); if (xruns > player->last_xrun_count) { LOGW("Audio underrun detected! xruns=%d" , xruns); player->last_xrun_count = xruns; } int32_t frames = AAudioStream_getBufferSizeInFrames( player->audio_stream); int32_t capacity = AAudioStream_getBufferCapacityInFrames( player->audio_stream); if (capacity > 0 && (float )frames / capacity < 0.25f ) { LOGW("Audio buffer low: %d/%d frames" , frames, capacity); } } }
九、Seek 实现 9.1 Seek 操作流程 1. 设置 seek 目标位置(微秒) 2. 标记状态为 SEEKING 3. av_seek_frame() 跳到指定位置(使用 AVSEEK_FLAG_BACKWARD 定位到前一关键帧) 4. avcodec_flush_buffers() 清空音视频解码器内部缓冲 5. 清空 packet queue 和 frame queue 6. 标记状态为 SEEKED,恢复为原状态(PLAYING/PAUSED) 7. 解码线程自动从新的关键帧位置开始解码
9.2 实现代码 int player_seek (PlayerContext *player, int64_t seek_pos_us) { if (!player->fmt_ctx) return -1 ; player->seek_request = 1 ; player->seek_pos = seek_pos_us; int stream_idx = player->video_stream_idx >= 0 ? player->video_stream_idx : player->audio_stream_idx; if (stream_idx < 0 ) return -2 ; int64_t seek_target = av_rescale_q( seek_pos_us, AV_TIME_BASE_Q, player->fmt_ctx->streams[stream_idx]->time_base); int ret = av_seek_frame( player->fmt_ctx, stream_idx, seek_target, AVSEEK_FLAG_BACKWARD ); if (ret < 0 ) { LOGE("av_seek_frame failed: %d" , ret); player->seek_request = 0 ; return ret; } pthread_mutex_lock(&player->seek_mutex); if (player->video_dec_ctx) { avcodec_flush_buffers(player->video_dec_ctx); } if (player->audio_dec_ctx) { avcodec_flush_buffers(player->audio_dec_ctx); } packet_queue_flush(&player->video_pkt_queue); packet_queue_flush(&player->audio_pkt_queue); frame_queue_flush(&player->video_frame_queue); clock_init(&player->audio_clock); clock_init(&player->ext_clock); player->seek_request = 0 ; pthread_mutex_unlock(&player->seek_mutex); LOGI("Seek completed: target=%lld us" , (long long )seek_pos_us); return 0 ; }
AVSEEK_FLAG_BACKWARD 的必要性 :大多数视频编码格式(H.264/H.265)中,只有关键帧(I 帧)可以独立解码。seek 到任意 PTS 位置时,解码器必须从目标 PTS 之前最近的 I 帧 开始解码,直到到达目标帧。AVSEEK_FLAG_BACKWARD 确保 av_seek_frame 返回的 packet 位于目标 PTS 或之前。
十、播放控制与生命周期管理 10.1 完整 PlayerContext 结构 typedef struct PlayerContext { volatile int state; volatile int abort_request; AVFormatContext *fmt_ctx; int video_stream_idx; int audio_stream_idx; AVStream *video_stream; AVStream *audio_stream; AVCodecContext *video_dec_ctx; AVCodecContext *audio_dec_ctx; PacketQueue video_pkt_queue; PacketQueue audio_pkt_queue; FrameQueue video_frame_queue; Clock audio_clock; Clock ext_clock; ANativeWindow *native_window; SwsContext *sws_ctx; SwrContext *swr_ctx; AAudioStream *audio_stream; pthread_t demux_thread; pthread_t video_dec_thread; pthread_t audio_dec_thread; pthread_t video_render_thread; volatile int seek_request; int64_t seek_pos; pthread_mutex_t seek_mutex; void (*on_prepared)(void *user_data); void (*on_completed)(void *user_data); void (*on_error)(void *user_data, int code, const char *msg); void *user_data; } PlayerContext;
10.2 状态转换实现 int player_prepare (PlayerContext *player, const char *url) { player->state = PLAYER_STATE_PREPARING; int ret = demuxer_open(player, url); if (ret < 0 ) goto error; if (player->video_stream_idx >= 0 ) { ret = decoder_init(&player->video_dec_ctx, player->fmt_ctx, player->video_stream_idx); if (ret < 0 ) goto error; } if (player->audio_stream_idx >= 0 ) { ret = decoder_init(&player->audio_dec_ctx, player->fmt_ctx, player->audio_stream_idx); if (ret < 0 ) goto error; player->swr_ctx = init_swr(player->audio_dec_ctx, 44100 , 2 , AV_SAMPLE_FMT_S16); } packet_queue_init(&player->video_pkt_queue, QUEUE_CAPACITY); packet_queue_init(&player->audio_pkt_queue, AUDIO_QUEUE_CAPACITY); frame_queue_init(&player->video_frame_queue, FRAME_QUEUE_CAPACITY); pthread_create(&player->demux_thread, NULL , demuxer_thread, player); pthread_create(&player->video_dec_thread, NULL , video_decode_thread, player); pthread_create(&player->audio_dec_thread, NULL , audio_decode_thread, player); player->state = PLAYER_STATE_PREPARED; if (player->on_prepared) player->on_prepared(player->user_data); return 0 ; error: player->state = PLAYER_STATE_ERROR; if (player->on_error) { char errbuf[256 ]; av_strerror(ret, errbuf, sizeof (errbuf)); player->on_error(player->user_data, ret, errbuf); } return ret; } int player_play (PlayerContext *player) { if (player->state == PLAYER_STATE_PREPARED || player->state == PLAYER_STATE_PAUSED) { player->state = PLAYER_STATE_PLAYING; if (player->audio_stream) AAudioStream_requestStart(player->audio_stream); return 0 ; } return -1 ; } int player_pause (PlayerContext *player) { if (player->state == PLAYER_STATE_PLAYING) { player->state = PLAYER_STATE_PAUSED; if (player->audio_stream) AAudioStream_requestPause(player->audio_stream); return 0 ; } return -1 ; } void player_release (PlayerContext *player) { player->abort_request = 1 ; packet_queue_abort(&player->video_pkt_queue); packet_queue_abort(&player->audio_pkt_queue); frame_queue_abort(&player->video_frame_queue); pthread_join(player->demux_thread, NULL ); pthread_join(player->video_dec_thread, NULL ); pthread_join(player->audio_dec_thread, NULL ); avformat_close_input(&player->fmt_ctx); avcodec_free_context(&player->video_dec_ctx); avcodec_free_context(&player->audio_dec_ctx); sws_freeContext(player->sws_ctx); swr_free(&player->swr_ctx); if (player->audio_stream) AAudioStream_close(player->audio_stream); if (player->native_window) ANativeWindow_release(player->native_window); packet_queue_destroy(&player->video_pkt_queue); packet_queue_destroy(&player->audio_pkt_queue); frame_queue_destroy(&player->video_frame_queue); player->state = PLAYER_STATE_RELEASED; }
十一、JNI 桥接 JNIEXPORT jlong JNICALL Java_com_example_player_NativePlayer_nativeCreate (JNIEnv *env, jobject thiz) { PlayerContext *player = calloc (1 , sizeof (PlayerContext)); player->state = PLAYER_STATE_IDLE; return (jlong)(uintptr_t )player; } JNIEXPORT void JNICALL Java_com_example_player_NativePlayer_nativeSetSurface ( JNIEnv *env, jobject thiz, jlong handle, jobject surface) { PlayerContext *player = (PlayerContext *)(uintptr_t )handle; player->native_window = surface ? ANativeWindow_fromSurface(env, surface) : NULL ; } JNIEXPORT jint JNICALL Java_com_example_player_NativePlayer_nativePrepare ( JNIEnv *env, jobject thiz, jlong handle, jstring url) { PlayerContext *player = (PlayerContext *)(uintptr_t )handle; const char *url_c = (*env)->GetStringUTFChars(env, url, NULL ); int ret = player_prepare(player, url_c); (*env)->ReleaseStringUTFChars(env, url, url_c); return ret; } JNIEXPORT void JNICALL Java_com_example_player_NativePlayer_nativePlay ( JNIEnv *env, jobject thiz, jlong handle) { player_play((PlayerContext *)(uintptr_t )handle); } JNIEXPORT void JNICALL Java_com_example_player_NativePlayer_nativePause ( JNIEnv *env, jobject thiz, jlong handle) { player_pause((PlayerContext *)(uintptr_t )handle); } JNIEXPORT void JNICALL Java_com_example_player_NativePlayer_nativeSeek ( JNIEnv *env, jobject thiz, jlong handle, jlong pos_us) { player_seek((PlayerContext *)(uintptr_t )handle, pos_us); } JNIEXPORT void JNICALL Java_com_example_player_NativePlayer_nativeRelease ( JNIEnv *env, jobject thiz, jlong handle) { player_release((PlayerContext *)(uintptr_t )handle); free ((PlayerContext *)(uintptr_t )handle); }
十二、结语 本文构建了一个完整的 NDK 视频播放器内核,覆盖了以下关键设计:
多线程流水线 :解封装→解码→渲染,通过阻塞队列解耦各阶段。
A/V 同步 :以音频时钟为主时钟,通过丢帧/等待保持画面与声音的同步。
Seek 实现 :利用 AVSEEK_FLAG_BACKWARD 跳转到关键帧,清空缓冲后无缝恢复播放。
ANativeWindow 渲染 :通过 sws_scale 将 YUV 帧转为 RGBA 后,直接在 Native 层面提交到 Surface。
AAudio 低延迟输出 :通过 data callback 模式驱动音频时钟更新,实现亚 10ms 级的音画同步精度。
状态机管理 :清晰的 IDLE→PREPARING→…→RELEASED 生命周期,确保资源的正确创建和释放。
实际工程中,还需考虑网络流媒体播放 (使用 avformat_open_input 的 AVFormatContext->interrupt_callback 实现超时取消)、硬件解码 (通过 MediaCodec 的 AVCodecContext->hw_device_ctx 配置)、缓冲策略 (根据网络带宽动态调整队列容量)等进阶话题,但核心架构保持不变。
参考资料