NV12转YUV444加速

问题描述

我有一个将图像从 nv12 转换为 yuv444 的代码

for h in range(self.img_shape[0]):
    # centralize yuv 444 data for inference framework
    for w in range(self.img_shape[1]):
        yuv444_res[h][w][0] = (nv12_y_data[h * self.img_shape[1] +w]).astype(np.int8)
        yuv444_res[h][w][1] = (nv12_u_data[int(h / 2) * int(self.img_shape[1] / 2) +int(w / 2)]).astype(np.int8)
        yuv444_res[h][w][2] = (nv12_v_data[int(h / 2) * int(self.img_shape[1] / 2) +int(w / 2)]).astype(np.int8)

由于for循环在python中很慢，比numpy慢很多。我想知道这个转换是否可以在 NumPy 计算中完成。

2021 年 6 月 15 日更新：

我能够从这个页面 External Link获得这段带有花哨索引的代码：

    yuv444 = np.empty([self.height,self.width,3],dtype=np.uint8)
    yuv444[:,:,0] = nv12_data[:self.width * self.height].reshape(
        self.height,self.width)
    u = nv12_data[self.width * self.height::2].reshape(
        self.height // 2,self.width // 2)
    yuv444[:,1] = Image.fromarray(u).resize((self.width,self.height))
    v = nv12_data[self.width * self.height + 1::2].reshape(
        self.height // 2,2] = Image.fromarray(v).resize((self.width,self.height))

    data[0] = yuv444.astype(np.int8)

如果使用 PIL 替换已弃用的 imresize，则代码与旧代码 100% 匹配

2021 年 6 月 19 日更新：

仔细查看 Rotem 给出的答案后，我意识到他的方式更快。

    #nv12_data is reshaped to one dimension
    y = nv12_data[:self.width * self.height].reshape(
        self.height,self.width)
    shrunk_u = nv12_data[self.width * self.height::2].reshape(
        self.height // 2,self.width // 2)
    shrunk_v = nv12_data[self.width * self.height + 1::2].reshape(
        self.height // 2,self.width // 2)
    u = cv2.resize(shrunk_u,(self.width,self.height),interpolation=cv2.INTER_NEAREST)
    v = cv2.resize(shrunk_v,interpolation=cv2.INTER_NEAREST)
    yuv444 = np.dstack((y,u,v))

另外，我对处理 1000 张图片进行了时间比较。结果证明 cv reshape 更快，并保证了相同的结果。

cv time: 4.417593002319336,pil time: 5.395732164382935

2021 年 6 月 25 日更新：

Pillow resize 在不同版本有不同的默认重采样参数值。

5.1.0：

def resize(self,size,resample=NEAREST,Box=None):

8.1.0：

def resize(self,resample=BICUBIC,Box=None,reducing_gap=None):

最好指定使用的重采样策略。

解决方法

您可以按照相反的顺序使用我在下面post中描述的过程（没有 RGB 部分）。

插图：

首先使用 FFmpeg（命令行工具）创建 NV12 格式的合成示例图像。
示例图片用于测试。

使用子进程模块从 Python 执行：

import subprocess as sp
import shlex

sp.run(shlex.split('ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1:duration=1 -vcodec rawvideo -pix_fmt nv12 nv12.yuv'))
sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x162 -pixel_format gray -i nv12.yuv -pix_fmt gray nv12_gray.png'))

阅读示例图片，并执行您帖子中的代码（用作参考）：

import numpy as np
import cv2

nv12 = cv2.imread('nv12_gray.png',cv2.IMREAD_GRAYSCALE)
cols,rows = nv12.shape[1],nv12.shape[0]*2//3

# Reference implementation - using for-loops (the solution is in the part below):
################################################################################
nv12_y_data = nv12[0:rows,:].flatten()
nv12_u_data = nv12[rows:,0::2].flatten()
nv12_v_data = nv12[rows:,1::2].flatten()

yuv444_res = np.zeros((rows,cols,3),np.uint8)

for h in range(rows):
    # centralize yuv 444 data for inference framework
    for w in range(cols):
        yuv444_res[h][w][0] = (nv12_y_data[h * cols + w]).astype(np.int8)
        yuv444_res[h][w][1] = (nv12_u_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)
        yuv444_res[h][w][2] = (nv12_v_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)

################################################################################

我建议的解决方案适用于以下阶段：

将 U 和 V 分成两个“半尺寸”矩阵 shrunk_u 和 shrunk_v。
使用 shrunk_u 将 shrunk_v 和 cv2.resize 调整为完整的图像大小矩阵。
在我的代码示例中，我使用最近邻插值来获得与您的结果相同的结果。
建议将其替换为线性插值以获得更好的质量。
使用 np.dstack 将 Y、U 和 V 合并为 YUV（3 个颜色通道）图像。

这是完整的代码示例：

import numpy as np
import subprocess as sp
import shlex
import cv2

sp.run(shlex.split('ffmpeg -y -f lavfi -i testsrc=size=192x108:rate=1:duration=1 -vcodec rawvideo -pix_fmt nv12 nv12.yuv'))
sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x162 -pixel_format gray -i nv12.yuv -pix_fmt gray nv12_gray.png'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x108 -pixel_format nv12 -i nv12.yuv -vcodec rawvideo -pix_fmt yuv444p yuv444.yuv'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x324 -pixel_format gray -i yuv444.yuv -pix_fmt gray yuv444_gray.png'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x108 -pixel_format yuv444p -i yuv444.yuv -pix_fmt rgb24 rgb.png'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video_size 192x108 -pixel_format gbrp -i yuv444.yuv -filter_complex "extractplanes=g+b+r[g][b][r],[r][g][b]mergeplanes=0x001020:gbrp[v]" -map "[v]" -vcodec rawvideo -pix_fmt rgb24 yuvyuv.yuv'))
#sp.run(shlex.split('ffmpeg -y -f rawvideo -video#_size 576x108 -pixel_format gray -i yuvyuv.yuv -pix_fmt gray yuvyuv_gray.png'))

nv12 = cv2.imread('nv12_gray.png',nv12.shape[0]*2//3

nv12_y_data = nv12[0:rows,np.uint8)

for h in range(rows):
    # centralize yuv 444 data for inference framework
    for w in range(cols):
        yuv444_res[h][w][0] = (nv12_y_data[h * cols + w]).astype(np.int8)
        yuv444_res[h][w][1] = (nv12_u_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)
        yuv444_res[h][w][2] = (nv12_v_data[int(h / 2) * int(cols / 2) + int(w / 2)]).astype(np.int8)

y = nv12[0:rows,:]
shrunk_u = nv12[rows:,0::2].copy()
shrunk_v = nv12[rows:,1::2].copy()

u = cv2.resize(shrunk_u,(cols,rows),interpolation=cv2.INTER_NEAREST)  # Resize U channel (use NEAREST interpolation - fastest,but lowest quality).
v = cv2.resize(shrunk_v,interpolation=cv2.INTER_NEAREST)  # Resize V channel

yuv444 = np.dstack((y,u,v))

is_eqaul = np.all(yuv444 == yuv444_res)
print('is_eqaul = ' + str(is_eqaul))  # is_eqaul = True

# Convert to RGB for display
yvu = np.dstack((y,v,u))  # Use COLOR_YCrCb2BGR,because it's uses the corrected conversion coefficients.
rgb = cv2.cvtColor(yvu,cv2.COLOR_YCrCb2BGR)

# Show results:
cv2.imshow('nv12',nv12)
cv2.imshow('yuv444_res',yuv444_res)
cv2.imshow('yuv444',yuv444)
cv2.imshow('rgb',rgb)
cv2.waitKey()
cv2.destroyAllWindows()

输入（NV12 显示为灰度）：

输出（转换为RGB后）：

似乎是花式索引（高级索引）的主要案例。

这样的事情应该可以解决问题，尽管我没有在实际图像上验证它。我在开始时添加了一个部分来重建图像，因为将数组作为一个整体处理比分解成多个部分更容易。很可能，您可以重构它并避免一开始就拆分它。

# reconstruct image array
y = nv12_y_data.reshape(self.image_shape[0],self.image_shape[1])
u = nv12_u_data.reshape(self.image_shape[0],self.image_shape[1])
v = nv12_v_data.reshape(self.image_shape[0],self.image_shape[1])
img = np.stack((y,v),axis=-1)

# take every index twice until half the range
idx_h = np.repeat(np.arange(img.shape[0] // 2),2)[:,None]
idx_w = np.repeat(np.arange(img.shape[1] // 2),2)[None,:]

# convert
yuv444 = np.empty_like(img,dtype=np.uint8)
yuv444[...,0] = img[...,0]
yuv444[...,1] = img[idx_h,idx_w,1]
yuv444[...,2] = img[idx_h,2]

如果这是您的关键路径，并且您想提高性能，您可以考虑先处理图像通道，这在现代 CPU（但不是 GPU）上会更快。

for-loop numpy yuv