说明
此代码仅限于 NV21 格式转 ARGB 格式。 NV21 格式中,Y 单独存储,UV分量交错存储。 使用如下公式: R = Y + 1.402*(V-128); G = Y - 0.34414*(U-128) - 0.71414*(V-128); B = Y + 1.772*(U-128); 浮点乘法用 6位精度处理(即a*b = ((a << 6)*b )>>6)
代码
#ifdef HAS_NEON
#include <arm_neon.h>
#endif
void convertToRGBA(
unsigned char* yuv,
int w,
int h,
int* rgba)
{
for (
int i=
0; i<h; ++i)
{
unsigned char* dst = (
unsigned char*)(rgba + w*i);
unsigned char* y = yuv + w*i;
unsigned char* uv = yuv + w*h + w*(i/
2);
int count = w;
#ifdef HAS_NEON
int c = count/
16;
asm volatile(
"mov r4, %[c]\t\n"
"beq 2f\t\n"
"vmov.u8 d7, #255\t\n"
"vmov.u8 d3, #255\t\n"
"vmov.s16 q11, #90\t\n"
"vmov.s16 q12, #128\t\n"
"vmov.s16 q13, #21\t\n"
"vmov.s16 q14, #46\t\n"
"vmov.s16 q15, #113\t\n"
"1:\t\n"
"vld2.8 {d8, d9}, [%[y]]!\t\n"
"vld2.8 {d0, d1}, [%[uv]]!\t\n"
"vmovl.u8 q5, d0\t\n"
"vmovl.u8 q6, d1\t\n"
"vsub.i16 q5,q5, q12\t\n"
"vsub.i16 q6,q6, q12\t\n"
"vshll.u8 q7, d8, #6\t\n"
"vshll.u8 q8, d8, #6\t\n"
"vshll.u8 q9, d8, #6\t\n"
"vmla.i16 q7, q6, q11\t\n"
"vmls.i16 q8, q5, q13\t\n"
"vmls.i16 q8, q6, q14\t\n"
"vmla.i16 q9, q5, q15\t\n"
"vshr.s16 q7, q7, #6\t\n"
"vshr.s16 q8, q8, #6\t\n"
"vshr.s16 q9, q9, #6\t\n"
"vmov.s16 q10, #0\t\n"
"vmax.s16 q7, q7, q10\t\n"
"vmax.s16 q8, q8, q10\t\n"
"vmax.s16 q9, q9, q10\t\n"
"vmov.u16 q10, #255\t\n"
"vmin.u16 q7, q7, q10\t\n"
"vmin.u16 q8, q8, q10\t\n"
"vmin.u16 q9, q9, q10\t\n"
"vmovn.s16 d2, q7\t\n"
"vmovn.s16 d1, q8\t\n"
"vmovn.s16 d0, q9\t\n"
"vshll.u8 q7, d9, #6\t\n"
"vshll.u8 q8, d9, #6\t\n"
"vshll.u8 q9, d9, #6\t\n"
"vmla.i16 q7, q6, q11\t\n"
"vmls.i16 q8, q5, q13\t\n"
"vmls.i16 q8, q6, q14\t\n"
"vmla.i16 q9, q5, q15\t\n"
"vshr.s16 q7, q7, #6\t\n"
"vshr.s16 q8, q8, #6\t\n"
"vshr.s16 q9, q9, #6\t\n"
"vmov.s16 q10, #0\t\n"
"vmax.s16 q7, q7, q10\t\n"
"vmax.s16 q8, q8, q10\t\n"
"vmax.s16 q9, q9, q10\t\n"
"vmov.u16 q10, #255\t\n"
"vmin.u16 q7, q7, q10\t\n"
"vmin.u16 q8, q8, q10\t\n"
"vmin.u16 q9, q9, q10\t\n"
"vmovn.s16 d6, q7\t\n"
"vmovn.s16 d5, q8\t\n"
"vmovn.s16 d4, q9\t\n"
"vtrn.8 d2,d6\t\n"
"vtrn.16 d2,d6\t\n"
"vtrn.32 d2,d6\t\n"
"vtrn.8 d1,d5\t\n"
"vtrn.16 d1,d5\t\n"
"vtrn.32 d1,d5\t\n"
"vtrn.8 d0,d4\t\n"
"vtrn.16 d0,d4\t\n"
"vtrn.32 d0,d4\t\n"
"vst4.8 {d0-d3}, [%[dst]]!\t\n"
"vst4.8 {d4-d7}, [%[dst]]!\t\n"
"subs r4, r4, #1\t\n"
"bne 1b\t\n"
"2:\t\n"
: [dst]
"+r" (dst), [y]
"+r" (y), [uv]
"+r" (uv), [c]
"+r" (c)
:
:
"r4",
"cc",
"memory",
"d0",
"d1",
"d2",
"d3",
"d4",
"d5",
"d6",
"d7",
"d8",
"d9",
"d10",
"d11",
"d12",
"d13",
"d14",
"d15",
"d16",
"d17",
"d18",
"d19",
"d20",
"d21",
"d22",
"d23",
"d24",
"d25",
"d26",
"d27",
"d28",
"d29",
"d30",
"d31"
);
count%=
16;
#endif
int r, g, b;
while (count >
1)
{
unsigned char _y = y[
0];
unsigned char _u = uv[
0];
unsigned char _v = uv[
1];
r = _y + ((
179*(_v-
128))>>
7);
g = _y - ((
43*(_u-
128) -
91*(_v-
128))>>
7);
b = _y + ((
227*(_u-
128))>>
7);
r = r<
0?
0:r;r=r>
255?
255:r;
g = g<
0?
0:g;g=g>
255?
255:g;
b = b<
0?
0:b;b=b>
255?
255:b;
dst[
0] = b;
dst[
1] = g;
dst[
2] = r;
dst[
3] =
0xFF;
y++;
dst+=
4;
_y = y[
0];
r = _y + ((
179*(_v-
128))>>
7);
g = _y - ((
43*(_u-
128) -
91*(_v-
128))>>
7);
b = _y + ((
227*(_u-
128))>>
7);
r = r<
0?
0:r;r=r>
255?
255:r;
g = g<
0?
0:g;g=g>
255?
255:g;
b = b<
0?
0:b;b=b>
255?
255:b;
dst[
0] = b;
dst[
1] = g;
dst[
2] = r;
dst[
3] =
0xFF;
y++;
uv+=
2;
dst+=
4;
count-=
2;
}
if (count >
0)
{
unsigned char _y = y[
0];
unsigned char _u = uv[
0];
unsigned char _v = uv[
1];
r = _y + ((
179*(_v-
128))>>
7);
g = _y - ((
43*(_u-
128) -
91*(_v-
128))>>
7);
b = _y + ((
227*(_u-
128))>>
7);
r = r<
0?
0:r;r=r>
255?
255:r;
g = g<
0?
0:g;g=g>
255?
255:g;
b = b<
0?
0:b;b=b>
255?
255:b;
dst[
0] = b;
dst[
1] = g;
dst[
2] = r;
dst[
3] =
0xFF;
}
}
}
转载请注明原文地址: https://www.6miu.com/read-16445.html