zhuang-xd

庄晓栋2025/12/26大约 7 分钟

上周回顾

主要就是

yuv/rgb转换
重新写了xvideoview库，还有些小问题并不影响
qt写的多路yuv/rgb播放器
h264编码，没弄完大概50%

本周计划

封装ffmpeg
264解码
解封装mp4
解封装rtsp

本周记录

封装ffmpeg的编码

#daily/25/10/20

具体参考 015test_xencode

用了静态工厂方法创建编码上下文对象，不会自动析构，需要手动管理创建的资源

加锁

#pragma once
#include <mutex>
struct AVCodecContext;
class XEncode
{
public:
	std::mutex mut_;
	AVCodecContext* context_ = nullptr;
}

AVPacket* XEncode::Encode(AVFrame* frame)
{
	unique_lock<mutex> lock(mut_);
	//...
}

封装context的配置项

bool XEncode::SetContextOption(const char* key, int val)
{
	unique_lock<mutex> lock(mut_);

	if (!context_) return false;
	
	int re = av_opt_set_int(context_->priv_data,key,val,0);
	if (re < 0) {
		cout << "SetContextOption failed ";
		PrintErr(re);
		return false;
	}

	return true;
}

取出缓冲区中的数据

std::vector<AVPacket*> XEncode::End()
{
	unique_lock<mutex> lock(mut_);

	vector<AVPacket*> pkt_vector;
	if (!context_) return pkt_vector;

	int re = avcodec_send_frame(context_,NULL);
	if (re != 0) {
		return pkt_vector;
	}
	
	while (true)
	{
		AVPacket* pkt = av_packet_alloc();

		int re = avcodec_receive_packet(context_, pkt);
		if (re == 0) {
			pkt_vector.push_back(pkt);
		}
		else if (re == AVERROR_EOF || re == AVERROR(EINVAL))
		{
			break;
		}
		else {
			PrintErr(re);
			break;
		}
	}

	return pkt_vector;
}

vs制作动态链接库

#daily/25/10/21

制作

新建项目选择动态库，然后删除自动生成的.h和.cpp文件
禁用预编译头

把要暴露的添加 __declspec(dllexport) ，可以用下面这种宏的方式

#define XVIDEO_VIEW_EXPORTS

#ifdef XVIDEO_VIEW_EXPORTS
#define XVIDEO_VIEW_API __declspec(dllexport) 
#else
#define XVIDEO_VIEW_API __declspec(dllimport) 
#endif

class AVFrame;
class XVIDEO_VIEW_API XVideoView
{
}

vs中这里可以在代码之外添加宏定义

使用

把库需要用到的头文件复制到项目

代码中添加lib的位置，引入头文件就可以正常使用了

#include "xvideoview.h"

#pragma comment(lib,"../../bin/Win32/Release/xvideoview.lib")

ffmpeg软解码

具体参考 016test_decode_h264

ffmpeg解码接口

从h264中提取avpacket

这个init是静态的吗？ #question

效果

代码

基本和编码差不多，多了一步通过ffmpeg提供的接口去初始化parser，切割文件中的 avpacket。

// 读文件到buf
while(true)
{
	// 切割avpacket，一个buf中可能切出好几个avpacket
	while(true) 
	{
		// 接收avframe，可能一次接收到多个avframe
		while(true) 
		{
		}
	}
}

// 获取读取缓冲区的frame

多线程解码

// 创建上下文
auto context = avcodec_alloc_context3(dec);

context->thread_count = 16; // 开启多线程

硬解码 DXAV2

具体参考 016test_decode_h264

#daily/25/10/22

支持的硬件加速类型

// 打印支持的硬件加速
for (int i = 0;;i++)
{
	auto config = avcodec_get_hw_config(dec,i);
	if (!config) break;
	const char* type = av_hwdevice_get_type_name(config->device_type);
	if (type) {
		cout << type << endl;
	}
	else {
		cout << config->device_type << endl;
	}
}

硬件加速接口

AVBufferRef* hw_context = nullptr;
AVHWDeviceType hw_type = AV_HWDEVICE_TYPE_DXVA2;
av_hwdevice_ctx_create(&hw_context, hw_type,NULL,NULL,0);

context->hw_device_ctx = hw_context; // 在解/编码器上下文中设置开启硬件加速


// 软解码的frame转换成硬解码的frame色彩格式
av_hwframe_transfer_data(hw_frame,frame,0);

nv12格式

#daily/25/10/23

具体参考 016test_decode_h264

Y0 Y1 Y2 Y3
Y4 Y5 Y6 Y7
Y8 Y9 Y10 Y11
Y12 Y13 Y14 Y15
U0 V0 U1 V1
U2 V2 U3 V3

nv12只有两个平面，uv平面是交错

y平面
uv平面

和yuv格式不同

y平面
u平面
v平面

SDL的两个接口

SDL_UpdateTexture 这个可以接收整个数组，和步长
SDL_UpdateYUVTexture 这个可以接收yuv3个平面的数据，和步长

nv12格式只有y平面和uv平面，uv平面中uv的数据又是交错的，所以手动把y平面和uv平面做一个拼接后，就可以使用SDL_UpdateTexture 这个接口去渲染nv12格式了

本质是字节没对齐，也就是说每一行的宽度不一致，需要手动调整

// 转换成功就使用硬件加速的frame
/*	
	nv12格式
	Y0  Y1  Y2  Y3
	Y4  Y5  Y6  Y7
	Y8  Y9  Y10 Y11
	Y12 Y13 Y14 Y15
	U0	V0	U1	V1
	U2	V2	U3	V3
*/

auto cache = make_unique<unsigned char[]>(1920 * 1080 * 4 * 3 / 2);
int y_size = p_frame->width * p_frame->height;
int uv_size = p_frame->width * p_frame->height / 2;
memcpy(cache.get(), p_frame->data[0], y_size);				// y平面
memcpy(cache.get() + y_size, p_frame->data[1], uv_size);	// uv平面

逐行扫描的方式，让每一行的width强行一致

这里有点问题的，可以参考[[2025-w43#字节对齐问题|字节对齐问题]]

// 逐行扫描方式，强制对齐到frame的width宽度
for (int i = 0; i < p_frame->height; i++) // y平面
{
	auto offset = p_frame->width * i;
	memcpy(cache.get() + offset, p_frame->data[0] + offset, p_frame->width);
}
// uv平面，宽度一致，只需要高度的一半就可以了,如果只有u平面则为height / 4
for (int i = 0; i < p_frame->height / 2; i++) 
{
	auto offset = p_frame->width * i;
	memcpy(cache.get() + offset + p_frame->width * p_frame->height,
		p_frame->data[1] + offset,
		p_frame->width
	);
}

色彩产生偏差说明是SDL渲染的格式没选对，需要改成SDL_PIXELFORMAT_NV12

auto texture_ = SDL_CreateTexture(render_, SDL_PIXELFORMAT_NV12, SDL_TEXTUREACCESS_STREAMING,width,height);

封装xdecode

具体参考 017test_xcode

由于 encode 和 decode 的流程是一样的，所以可以提取到共同的xcode上，然后分别继承就好了

xdecode

#pragma once
#include "xcode.h"

class XVIDEO_VIEW_API XDecode : public XCode
{
public:
    bool SendPkt(AVPacket* pkt);
    bool RecvFrame(AVFrame* frame);
    std::vector<AVFrame*> End();
};

xcode

#pragma once
#include <mutex>
#include <vector>

#ifdef XVIDEO_VIEW_EXPORTS
#define XVIDEO_VIEW_API __declspec(dllexport) 
#else
#define XVIDEO_VIEW_API __declspec(dllimport) 
#endif

void PrintErr(int re);

struct AVCodecContext;
struct AVPacket;
struct AVFrame;
class XVIDEO_VIEW_API XCode
{
public:
	static AVCodecContext* Create(int id, bool is_enc);
	bool Open();
	bool SetContextOption(const char* key, const char* val);
	bool SetContextOption(const char* key, int val);
	bool SetContext(AVCodecContext* context);
	AVFrame* CreateFrame();
	bool Close();

public:
	std::mutex mut_;
	AVCodecContext* context_ = nullptr;
	AVFrame* frame_ = nullptr;
};

DXVA不复制直接用显存渲染

这个因为他是翻源码自己去定义的结构体，而且只针对dxva能用，感觉实际没有太大的参考价值，需要了再来查 #todo

ffmpeg会硬解会需要把数据从内存拷贝到显存

取出后再用SDL渲染会把数据从显存那到内存，再渲染的时候又拷贝到显存

编解码综合练习

将h264文件解码，改变像素尺寸400x300，重新编码成h264

具体参考 018exe_encode_decode

把这个用到的接口也去封装一下 #todo

效果

字节对齐问题

通过发送h264的avpacket，返回的avframe 640x360直接写入文件是可以正常播放的

ofs.write((char *)frame->data[0], frame->width * frame->height);
ofs.write((char *)frame->data[1], frame->width * frame->height / 4);
ofs.write((char *)frame->data[2], frame->width * frame->height / 4);

assets>ffplay -video_size 640x360 ./resize.yuv

但是转换成400x300后出现字节不对齐问题

context_sws = sws_getCachedContext(					// 获取frame转换上下文，null会创建
	context_sws,
	frame->width, frame->height, (AVPixelFormat)frame->format,	// 原始宽高格式
	width_dst, height_dst, (AVPixelFormat)frame->format,		// 目标宽高格式
	SWS_BILINEAR,												// 抗锯齿算法 flag
	0, 0, 0
);

sws_scale(											// 变换尺寸
	context_sws,
	frame->data,frame->linesize,0,frame->height,
	frame_dst->data,frame_dst->linesize
);

ofs.write((char *)frame_dst->data[0], frame_dst->width * frame_dst->height);
ofs.write((char *)frame_dst->data[1], frame_dst->width * frame_dst->height / 4);
ofs.write((char *)frame_dst->data[2], frame_dst->width * frame_dst->height / 4);

这里手动把 linesize[0] 对齐到 frame_dst->width

// 较大的数组创建在堆上，否则运行时栈会报错
auto cache = make_unique<unsigned char[]>(1920 * 1080 * 8); 
auto size_y = frame_dst->width * frame_dst->height;
auto size_u = frame_dst->width / 2 * frame_dst->height / 2;

for (int i = 0; i < frame_dst->height; i++) // y
{
	int src_offset = i * frame_dst->linesize[0];		// 源：带padding的偏移
	int dst_offset = i * frame_dst->width;				// 目标：连续存储的偏移
	memcpy(cache.get() + dst_offset,
		frame_dst->data[0] + src_offset,
		frame_dst->width);
}
for (int i = 0; i < frame_dst->height / 2; i++) // u
{
	int src_offset = i * frame_dst->linesize[1];		// 源：带padding的偏移
	int dst_offset = i * frame_dst->width / 2;          // 目标：连续存储的偏移
	memcpy(cache.get() + dst_offset + size_y,
		frame_dst->data[1] + src_offset,
		frame_dst->width / 2);
}
for (int i = 0; i < frame_dst->height / 2; i++) // v
{
	int src_offset = i * frame_dst->linesize[2];		// 源：带padding的偏移
	int dst_offset = i * frame_dst->width / 2;			// 目标：连续存储的偏移
	memcpy(cache.get() + dst_offset + size_y + size_u,
		frame_dst->data[2] + src_offset,
		frame_dst->width / 2);
}

ofs.write((char *)cache.get(), frame_dst->width * frame_dst->height * 3 / 2);

时间戳pts

需要手动地给 frame_dst 添加时间戳

int pts = 0;

while(true)
{
	re = avcodec_send_frame(context_enc,frame_dst); // 发送原始帧
	if (re != 0) break;

	frame_dst->pts = pts++; //如果不给值，这个会是一个随机值，ffmpeg的库会报错的
}