二. 源码注释分析:
/*
=======Analysed by: yangxin
=======Date: 2018.10
=======Function: encodeResAndCalcRdInterCU() merge模式编码残差并进行RD-cost计算,以及进行熵编码
*/
/* encode residual and calculate rate-distortion for a CU block.
* Note: this function overwrites the RD cost variables of interMode, but leaves the sa8d cost unharmed */
void Search::encodeResAndCalcRdInterCU(Mode& interMode, const CUGeom& cuGeom)
{
ProfileCUScope(interMode.cu, interRDOElapsedTime[cuGeom.depth], countInterRDO[cuGeom.depth]);
CUData& cu = interMode.cu;
Yuv* reconYuv = &interMode.reconYuv;
Yuv* predYuv = &interMode.predYuv;
uint32_t depth = cuGeom.depth;
ShortYuv* resiYuv = &m_rqt[depth].tmpResiYuv;
const Yuv* fencYuv = interMode.fencYuv;
X265_CHECK(!cu.isIntra(0), "intra CU not expected\n");
uint32_t log2CUSize = cuGeom.log2CUSize;
int sizeIdx = log2CUSize - 2;
resiYuv->subtract(*fencYuv, *predYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);//--原始图像和预测图象相减,计算残差
uint32_t tuDepthRange[2];
cu.getInterTUQtDepthRange(tuDepthRange, 0);//--得到tu深度范围
m_entropyCoder.load(m_rqt[depth].cur);
if ((m_limitTU & X265_TU_LIMIT_DFS) && !(m_limitTU & X265_TU_LIMIT_NEIGH))
m_maxTUDepth = -1;
else if (m_limitTU & X265_TU_LIMIT_BFS)
memset(&m_cacheTU, 0, sizeof(TUInfoCache));
===================================
Cost costs;
if (m_limitTU & X265_TU_LIMIT_NEIGH)
{
/* Save and reload maxTUDepth to avoid changing of maxTUDepth between modes */
int32_t tempDepth = m_maxTUDepth;
if (m_maxTUDepth != -1)
{
uint32_t splitFlag = interMode.cu.m_partSize[0] != SIZE_2Nx2N;
uint32_t minSize = tuDepthRange[0];
uint32_t maxSize = tuDepthRange[1];
maxSize = X265_MIN(maxSize, cuGeom.log2CUSize - splitFlag);
m_maxTUDepth = x265_clip3(cuGeom.log2CUSize - maxSize, cuGeom.log2CUSize - minSize, (uint32_t)m_maxTUDepth);
}
estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);//--估计残差的率失真代价和比特代价,量化和变换****
m_maxTUDepth = tempDepth;
}
else
estimateResidualQT(interMode, cuGeom, 0, 0, *resiYuv, costs, tuDepthRange);//--******
uint32_t tqBypass = cu.m_tqBypass[0];//--lossless flags
if (!tqBypass)
{
sse_t cbf0Dist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
{
cbf0Dist += m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], predYuv->m_csize, predYuv->m_buf[1], predYuv->m_csize));
cbf0Dist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], predYuv->m_csize, predYuv->m_buf[2], predYuv->m_csize));
}
/* Consider the RD cost of not signaling any residual */
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();
m_entropyCoder.codeQtRootCbfZero();
uint32_t cbf0Bits = m_entropyCoder.getNumberOfWrittenBits();
uint32_t cbf0Energy; uint64_t cbf0Cost;
if (m_rdCost.m_psyRd)
{
cbf0Energy = m_rdCost.psyCost(log2CUSize - 2, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
cbf0Cost = m_rdCost.calcPsyRdCost(cbf0Dist, cbf0Bits, cbf0Energy);
}
else if(m_rdCost.m_ssimRd)
{
cbf0Energy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size, log2CUSize, TEXT_LUMA, 0);
cbf0Cost = m_rdCost.calcSsimRdCost(cbf0Dist, cbf0Bits, cbf0Energy);
}
else
cbf0Cost = m_rdCost.calcRdCost(cbf0Dist, cbf0Bits);
if (cbf0Cost < costs.rdcost)
{
cu.clearCbf();
cu.setTUDepthSubParts(0, 0, depth);
}
}
if (cu.getQtRootCbf(0))
saveResidualQTData(cu, *resiYuv, 0, 0);//--保存残差量化变化数据,四叉树递归调用,应该有tu的再次划分
//--熵编码*****//
/* calculate signal bits for inter/merge/skip coded CU */
m_entropyCoder.load(m_rqt[depth].cur);
m_entropyCoder.resetBits();//--清零操作
if (m_slice->m_pps->bTransquantBypassEnabled)
m_entropyCoder.codeCUTransquantBypassFlag(tqBypass);
//--计算bits
uint32_t coeffBits, bits, mvBits;
if (cu.m_mergeFlag[0] && cu.m_partSize[0] == SIZE_2Nx2N && !cu.getQtRootCbf(0))//--merge/skip
{
cu.setPredModeSubParts(MODE_SKIP);
/* Merge/Skip */
coeffBits = mvBits = 0;
m_entropyCoder.codeSkipFlag(cu, 0);
int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits();
m_entropyCoder.codeMergeIndex(cu, 0);
mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits;
bits = mvBits + skipFlagBits;//--
}
else//--Amvp
{
m_entropyCoder.codeSkipFlag(cu, 0);
int skipFlagBits = m_entropyCoder.getNumberOfWrittenBits();
m_entropyCoder.codePredMode(cu.m_predMode[0]);
m_entropyCoder.codePartSize(cu, 0, cuGeom.depth);
m_entropyCoder.codePredInfo(cu, 0);
mvBits = m_entropyCoder.getNumberOfWrittenBits() - skipFlagBits;
bool bCodeDQP = m_slice->m_pps->bUseDQP;
m_entropyCoder.codeCoeff(cu, 0, bCodeDQP, tuDepthRange);
bits = m_entropyCoder.getNumberOfWrittenBits();//--
coeffBits = bits - mvBits - skipFlagBits;
}
m_entropyCoder.store(interMode.contexts);//--
if (cu.getQtRootCbf(0))//--从已经编码块标志进行量化变换
reconYuv->addClip(*predYuv, *resiYuv, log2CUSize, m_frame->m_fencPic->m_picCsp);
else
reconYuv->copyFromYuv(*predYuv);//--重建图像直接copy预测图像
//--------------------------------------------------------------------------------------------------------/
// update with clipped distortion and cost (qp estimation loop uses unclipped values)
//--luma
sse_t bestLumaDist = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
interMode.distortion = bestLumaDist;
//--chroma
if (m_csp != X265_CSP_I400 && m_frame->m_fencPic->m_picCsp != X265_CSP_I400)
{
sse_t bestChromaDist = m_rdCost.scaleChromaDist(1, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[1], fencYuv->m_csize, reconYuv->m_buf[1], reconYuv->m_csize));
bestChromaDist += m_rdCost.scaleChromaDist(2, primitives.chroma[m_csp].cu[sizeIdx].sse_pp(fencYuv->m_buf[2], fencYuv->m_csize, reconYuv->m_buf[2], reconYuv->m_csize));
interMode.chromaDistortion = bestChromaDist;
interMode.distortion += bestChromaDist;//--总失真
}
if (m_rdCost.m_psyRd)
interMode.psyEnergy = m_rdCost.psyCost(sizeIdx, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size);
else if(m_rdCost.m_ssimRd)
interMode.ssimEnergy = m_quant.ssimDistortion(cu, fencYuv->m_buf[0], fencYuv->m_size, reconYuv->m_buf[0], reconYuv->m_size, cu.m_log2CUSize[0], TEXT_LUMA, 0);
interMode.resEnergy = primitives.cu[sizeIdx].sse_pp(fencYuv->m_buf[0], fencYuv->m_size, predYuv->m_buf[0], predYuv->m_size);
interMode.totalBits = bits;//--总bits
interMode.lumaDistortion = bestLumaDist;
interMode.coeffBits = coeffBits;
interMode.mvBits = mvBits;
cu.m_distortion[0] = interMode.distortion;//--存储总失真
updateModeCost(interMode);//--总代价cost
checkDQP(interMode, cuGeom);
}