针对发票识别之类的自然环境中文本识别,对结果影响最大的应该是文本定位的问题。
下面本人参考 BoyTNT 的基于OpenCV进行文本分块切割(C#)
本人写了一份c++的代码 仅供参考
因为有一些干扰,所以文本定位并不是很完美,可以通过二值化的方法去掉一些干扰,以后再改进。
代码
#include <opencv2\opencv.hpp>
#include <vector>
#include <iostream>
using namespace cv;
using namespace std;
vector<CvRect> GetRowRects(Mat gray)
{
vector<CvRect>rows;
int height = gray.rows;
int *projection =
new int[height]();
imshow(
"d", gray);
IplImage src = gray;
for (
int y =
0; y < gray.rows; ++y)
{
for (
int x =
0; x < gray.cols; ++x)
{
CvScalar s;
s = cvGet2D(&src, y, x);
if (
int(s.val[
0]) ==
255)
projection[y]++;
}
}
bool inLine =
false;
int start =
0;
for (
int i =
0; i < height; i++)
{
if (!inLine && projection[i] >
10)
{
inLine =
true;
start = i;
}
else if ((i - start >
5) && projection[i] <
10 && inLine)
{
inLine =
false;
if (i - start >
10)
{
CvRect rect = cvRect(
0, start -
1, gray.cols, i - start +
2);
rows.push_back(rect);
}
}
}
delete projection;
return rows;
}
vector<CvRect> GetBlockRects(Mat gray,
int rowY)
{
IplImage src = gray;
vector<CvRect> blocks;
int height = gray.rows;
int width = gray.cols;
int *projection =
new int[width]();
Mat element = getStructuringElement(MORPH_RECT, Size(
10,
1));
dilate(gray, gray, element);
imshow(
"fushi", gray);
for (
int x =
0; x < width; ++x)
{
for (
int y =
0; y < height; ++y)
{
Scalar s = cvGet2D(&src, y, x);
if (s.val[
0] ==
255)
projection[x]++;
}
}
bool inBlock =
false;
int start =
0;
for (
int i =
0; i < width; ++i)
{
if (!inBlock && projection[i] >=
2)
{
inBlock =
true;
start = i;
}
else if ((i - start >
10) && inBlock && projection[i] <
2)
{
inBlock =
false;
if (blocks.size() >
0)
{
CvRect last = blocks[blocks.size() -
1];
if (start - last.x - last.width <=
5)
{
blocks.pop_back();
CvRect rect =cvRect(last.x, rowY, i - last.x, height);
blocks.push_back(rect);
}
else
{
CvRect rect = cvRect(start, rowY, i - start, height);
blocks.push_back(rect);
}
}
else
{
CvRect rect = cvRect(start, rowY, i - start, height);
blocks.push_back(rect);
}
}
}
return blocks;
}
int main(
int argc,
char* argv[])
{
string path =
"D:/Project/FapiaoSystem/test/fapiao6.jpg";
Mat image = imread(path);
imshow(
"原图", image);
if (image.rows*image.cols>
1000000)
{
int t;
if (image.cols>image.rows)t =
1000;
else t =
800;
resize(image, image, Size(t, image.rows*
1.0 / image.cols * t),
0,
0, CV_INTER_LINEAR);
}
Mat gray, data;
cvtColor(image, gray, CV_BGR2GRAY);
data = image.clone();
int blockSize =
25;
int constValue =
10;
adaptiveThreshold(gray, gray,
255, CV_ADAPTIVE_THRESH_MEAN_C, CV_THRESH_BINARY_INV, blockSize, constValue);
imshow(
"1", gray);
vector<CvRect>rows = GetRowRects(gray);
cout << rows.size();
vector<CvRect>items;
for each(CvRect row in rows)
{
vector<CvRect> cols = GetBlockRects(gray(row), row.y);
items.insert(items.end(), cols.begin(), cols.end());
}
for each (CvRect rect in items)
{
rectangle(image, rect,(
255,
0,
0));
}
imshow(
"result", image);
waitKey(
0);
}