上一篇文章介绍了Tesseract在Mac平台上的使用,现在看看怎么实现在Android上进行OCR识别。这里基于个人之前自己做的一个APP上集成,首先,修改app/build.gradle,增加对tess-two的支持,见76行:
把上一篇文章最后训练过的normal.trainneddata放到assert文件夹下:
编写测试界面如下:
编写页面对应java文件:
package com.linjk.ihouse.activity; import android.app.ActionBar; import android.app.Activity; import android.graphics.Bitmap; import android.graphics.BitmapFactory; import android.os.Bundle; import android.os.Environment; import android.util.Log; import android.view.MenuItem; import android.widget.TextView; import com.googlecode.tesseract.android.TessBaseAPI; import com.linjk.ihouse.R; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.io.OutputStream; import butterknife.Bind; import butterknife.ButterKnife; /** * Created by LinJK on 07/06/2017. */ public class ActivityTesseractOCR extends Activity { @Bind(R.id.ocr_result) TextView tvOCRResult; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); setContentView(R.layout.activity_tesseract_ocr); setTitle("TesseractOCR"); ButterKnife.bind(this); ActionBar actionBar = getActionBar(); actionBar.setDisplayHomeAsUpEnabled(true); try { copyLanguagePackageToSDCard(); TessBaseAPI lvBaseAPI = new TessBaseAPI(); lvBaseAPI.init(Environment.getExternalStorageDirectory().getPath(), "normal"); lvBaseAPI.setPageSegMode(TessBaseAPI.PageSegMode.PSM_AUTO); Bitmap lvBitmap = BitmapFactory.decodeResource(getResources(), R.drawable.name); lvBaseAPI.setImage(lvBitmap); String result = lvBaseAPI.getUTF8Text(); tvOCRResult.setText(result); lvBaseAPI.clear(); lvBaseAPI.end(); } catch (Exception e) { Log.e("OCR", e.getMessage()); } } // 把字库文件拷贝到SD卡,要求SD卡根目录有tessdata文件夹,字库在该文件夹下 private void copyLanguagePackageToSDCard() { try { String filePath = Environment.getExternalStorageDirectory().getPath() + "/tessdata/normal.traineddata"; File lvFile = new File(filePath); if (!lvFile.exists()) { Log.i("file", "创建路径"); if (lvFile.mkdir()) { Log.i("file", "创建成功"); } else { Log.i("file", "创建失败"); } } InputStream lvInputStream; OutputStream lvOutputStream = new FileOutputStream(filePath); // 拷贝文件 lvInputStream = this.getAssets().open("normal.traineddata"); byte[] buffer = new byte[1024]; int length = lvInputStream.read(buffer); while (length > 0) { lvOutputStream.write(buffer, 0, length); length = lvInputStream.read(buffer); } lvOutputStream.flush(); lvInputStream.close(); lvOutputStream.close(); } catch (Exception e) { Log.e("OCR<copy>", e.getMessage()); } } @Override public boolean onOptionsItemSelected(MenuItem item) { switch (item.getItemId()){ case android.R.id.home: finish(); return true; default: return super.onOptionsItemSelected(item); } } } 代码那里首先把assets文件的字库拷贝到sd卡根目录下的tessdata文件夹下,然后初始化OCR引擎,接着读取照片进行解析。这里还有很多可以改进的,识别应放到Handler来处理,这样就不会阻塞Ui线程,这只是测试功能而已就这么写了。