利用投影法基于Opencv的文本定位

xiaoxiao2021-02-28 53

针对发票识别之类的自然环境中文本识别，对结果影响最大的应该是文本定位的问题。

下面本人参考 BoyTNT 的基于OpenCV进行文本分块切割(C#)

本人写了一份c++的代码仅供参考

因为有一些干扰，所以文本定位并不是很完美，可以通过二值化的方法去掉一些干扰，以后再改进。

代码

#include <opencv2\opencv.hpp> #include <vector> #include <iostream> using namespace cv; using namespace std; vector<CvRect> GetRowRects(Mat gray) { vector<CvRect>rows; int height = gray.rows; int *projection = new int[height](); imshow("d", gray); IplImage src = gray; for (int y = 0; y < gray.rows; ++y) { for (int x = 0; x < gray.cols; ++x) { CvScalar s; s = cvGet2D(&src, y, x); if (int(s.val[0]) == 255) projection[y]++; } } bool inLine = false; int start = 0; for (int i = 0; i < height; i++) { if (!inLine && projection[i] > 10) { //由空白进入字符区域了，记录标记 inLine = true; start = i; } else if ((i - start > 5) && projection[i] < 10 && inLine) { //由字符区域进入空白区域了 inLine = false; //忽略高度太小的行，比如分隔线 if (i - start > 10) { //记录下位置 CvRect rect = cvRect(0, start - 1, gray.cols, i - start + 2); rows.push_back(rect); } } } delete projection; return rows; } vector<CvRect> GetBlockRects(Mat gray, int rowY) { IplImage src = gray; vector<CvRect> blocks; int height = gray.rows; int width = gray.cols; //用于存储投影值 int *projection = new int[width](); //横向膨胀 Mat element = getStructuringElement(MORPH_RECT, Size(10, 1)); //进行腐蚀操作 dilate(gray, gray, element); imshow("fushi", gray); for (int x = 0; x < width; ++x) { for (int y = 0; y < height; ++y) { Scalar s = cvGet2D(&src, y, x); if (s.val[0] == 255) projection[x]++; } } bool inBlock = false; int start = 0; //开始根据投影值识别分割点 for (int i = 0; i < width; ++i) { if (!inBlock && projection[i] >= 2) { //由空白区域进入字符区域了 inBlock = true; start = i; } else if ((i - start > 10) && inBlock && projection[i] < 2) { //由字符区域进入空白区域了 inBlock = false; //记录位置，注意由于传入的是source只是一行，因此最终的位置信息要+rowY if (blocks.size() > 0) { //跟上一个比一下，如果距离过近，认为是同一个文本块，合并 CvRect last = blocks[blocks.size() - 1]; if (start - last.x - last.width <= 5) { blocks.pop_back(); CvRect rect =cvRect(last.x, rowY, i - last.x, height); blocks.push_back(rect); } else { CvRect rect = cvRect(start, rowY, i - start, height); blocks.push_back(rect); } } else { CvRect rect = cvRect(start, rowY, i - start, height); blocks.push_back(rect); } } } return blocks; } int main(int argc, char* argv[]) { string path = "D:/Project/FapiaoSystem/test/fapiao6.jpg"; Mat image = imread(path); imshow("原图", image); //读入原图 //压缩 if (image.rows*image.cols>1000000) { int t; if (image.cols>image.rows)t = 1000; else t = 800; resize(image, image, Size(t, image.rows*1.0 / image.cols * t), 0, 0, CV_INTER_LINEAR); } Mat gray, data; cvtColor(image, gray, CV_BGR2GRAY); data = image.clone(); int blockSize = 25; int constValue = 10; //threshold(gray, gray, 0, 255, THRESH_BINARY_INV | CV_THRESH_OTSU); adaptiveThreshold(gray, gray, 255, CV_ADAPTIVE_THRESH_MEAN_C, CV_THRESH_BINARY_INV, blockSize, constValue); imshow("1", gray); vector<CvRect>rows = GetRowRects(gray); cout << rows.size(); vector<CvRect>items; for each(CvRect row in rows) { vector<CvRect> cols = GetBlockRects(gray(row), row.y); items.insert(items.end(), cols.begin(), cols.end()); //把cols push到items } for each (CvRect rect in items) { rectangle(image, rect,(255,0,0)); } imshow("result", image); waitKey(0); }

转载请注明原文地址: https://www.6miu.com/read-2612840.html

技术

最新回复(0)