using System;
using System.Collections.Generic;
using System.Drawing;
using System.Drawing.Imaging;
using System.IO;
using System.Linq;
using Tesseract;
namespace OCRTest
{
///
/// OCR配置类
///
public class OCRConfig
{
///
/// 识别语言(如:"eng", "chi_sim", "chi_sim+eng")
///
public string Language { get; set; } = "eng";
///
/// 引擎模式
///
public EngineMode EngineMode { get; set; } = EngineMode.Default;
///
/// 字符白名单(只识别这些字符,提高准确率)
///
public string Whitelist { get; set; } = null;
///
/// 是否启用图像预处理
///
public bool EnablePreprocessing { get; set; } = true;
///
/// Tessdata目录路径
///
public string TessDataPath { get; set; } = "./tessdata";
///
/// PSM页面分割模式(0-13)
///
public PageSegMode PageSegMode { get; set; } = PageSegMode.Auto;
}
///
/// OCR识别结果
///
public class OCRResult
{
///
/// 识别的文本
///
public string Text { get; set; }
///
/// 置信度(0-100)
///
public float Confidence { get; set; }
///
/// 识别耗时(毫秒)
///
public long ElapsedMilliseconds { get; set; }
///
/// 是否成功
///
public bool Success { get; set; }
///
/// 错误信息
///
public string Error { get; set; }
}
///
/// 高性能OCR识别引擎(线程安全单例)
///
public class OCREngine : IDisposable
{
private static readonly object _lock = new object();
private static Dictionary _instances = new Dictionary();
private TesseractEngine _engine;
private OCRConfig _config;
private bool _disposed = false;
///
/// 获取默认实例(英文识别)
///
public static OCREngine Instance
{
get
{
return GetInstance("eng");
}
}
///
/// 获取指定语言的实例
///
/// 语言代码
/// OCR引擎实例
public static OCREngine GetInstance(string language)
{
lock (_lock)
{
if (!_instances.ContainsKey(language))
{
var config = new OCRConfig { Language = language };
_instances[language] = new OCREngine(config);
}
return _instances[language];
}
}
///
/// 获取自定义配置的实例
///
/// 配置对象
/// OCR引擎实例
public static OCREngine GetInstance(OCRConfig config)
{
string key = $"{config.Language}_{config.EngineMode}";
lock (_lock)
{
if (!_instances.ContainsKey(key))
{
_instances[key] = new OCREngine(config);
}
return _instances[key];
}
}
///
/// 私有构造函数
///
private OCREngine(OCRConfig config)
{
_config = config;
InitializeEngine();
}
///
/// 初始化Tesseract引擎
///
private void InitializeEngine()
{
try
{
// 检查tessdata目录是否存在
if (!Directory.Exists(_config.TessDataPath))
{
throw new DirectoryNotFoundException($"Tessdata目录不存在:{_config.TessDataPath}");
}
// 创建引擎
_engine = new TesseractEngine(_config.TessDataPath, _config.Language, _config.EngineMode);
// 设置PSM模式
_engine.DefaultPageSegMode = _config.PageSegMode;
// 设置字符白名单(如果指定)
if (!string.IsNullOrEmpty(_config.Whitelist))
{
_engine.SetVariable("tessedit_char_whitelist", _config.Whitelist);
}
// 优化性能配置
_engine.SetVariable("tessedit_pageseg_mode", ((int)_config.PageSegMode).ToString());
Console.WriteLine($"OCR引擎初始化成功 - 语言:{_config.Language}, 模式:{_config.EngineMode}");
}
catch (Exception ex)
{
throw new InvalidOperationException($"OCR引擎初始化失败:{ex.Message}", ex);
}
}
///
/// 识别图片文件
///
/// 图片路径
/// 识别结果
public OCRResult RecognizeText(string imagePath)
{
if (string.IsNullOrEmpty(imagePath))
return new OCRResult { Success = false, Error = "图片路径不能为空" };
if (!File.Exists(imagePath))
return new OCRResult { Success = false, Error = $"文件不存在:{imagePath}" };
try
{
var sw = System.Diagnostics.Stopwatch.StartNew();
// 加载图片
using (var pix = Pix.LoadFromFile(imagePath))
{
// 图像预处理(如果启用)
Pix processedPix = _config.EnablePreprocessing ? PreprocessImage(pix) : pix;
// 执行识别
using (var page = _engine.Process(processedPix))
{
var text = page.GetText();
var confidence = page.GetMeanConfidence();
sw.Stop();
return new OCRResult
{
Text = text.Trim(),
Confidence = confidence * 100,
ElapsedMilliseconds = sw.ElapsedMilliseconds,
Success = true
};
}
}
}
catch (Exception ex)
{
return new OCRResult
{
Success = false,
Error = $"识别失败:{ex.Message}"
};
}
}
///
/// 从Bitmap识别文字
///
/// Bitmap对象
/// 识别结果
public OCRResult RecognizeFromBitmap(Bitmap bitmap)
{
if (bitmap == null)
return new OCRResult { Success = false, Error = "Bitmap不能为空" };
try
{
var sw = System.Diagnostics.Stopwatch.StartNew();
// 将Bitmap转换为Pix
using (var pix = BitmapToPix(bitmap))
{
// 图像预处理
Pix processedPix = _config.EnablePreprocessing ? PreprocessImage(pix) : pix;
// 执行识别
using (var page = _engine.Process(processedPix))
{
var text = page.GetText();
var confidence = page.GetMeanConfidence();
sw.Stop();
return new OCRResult
{
Text = text.Trim(),
Confidence = confidence * 100,
ElapsedMilliseconds = sw.ElapsedMilliseconds,
Success = true
};
}
}
}
catch (Exception ex)
{
return new OCRResult
{
Success = false,
Error = $"识别失败:{ex.Message}"
};
}
}
///
/// 批量识别图片
///
/// 图片路径数组
/// 识别结果列表
public List RecognizeBatch(string[] imagePaths)
{
var results = new List();
foreach (var path in imagePaths)
{
var result = RecognizeText(path);
results.Add(result);
}
return results;
}
///
/// 图像预处理(灰度化、二值化、降噪)
///
private Pix PreprocessImage(Pix originalPix)
{
try
{
// 转换为灰度图
using (var grayPix = originalPix.ConvertRGBToGray())
{
// 二值化处理(Otsu阈值)
using (var binaryPix = grayPix.BinarizeOtsuAdaptiveThreshold(200, 200, 10, 10, 0.1f))
{
// 返回处理后的图片(需要克隆,因为using会释放)
return binaryPix.Clone();
}
}
}
catch
{
// 预处理失败,返回原图
return originalPix.Clone();
}
}
///
/// 将Bitmap转换为Pix
///
private Pix BitmapToPix(Bitmap bitmap)
{
// 确保是24位或32位格式
Bitmap tempBitmap = bitmap;
bool needDispose = false;
if (bitmap.PixelFormat != PixelFormat.Format24bppRgb &&
bitmap.PixelFormat != PixelFormat.Format32bppArgb)
{
tempBitmap = new Bitmap(bitmap.Width, bitmap.Height, PixelFormat.Format24bppRgb);
using (var g = Graphics.FromImage(tempBitmap))
{
g.DrawImage(bitmap, 0, 0);
}
needDispose = true;
}
try
{
// 保存为临时PNG文件
string tempFile = Path.GetTempFileName() + ".png";
tempBitmap.Save(tempFile, ImageFormat.Png);
// 加载为Pix
var pix = Pix.LoadFromFile(tempFile);
// 删除临时文件
File.Delete(tempFile);
return pix;
}
finally
{
if (needDispose)
{
tempBitmap.Dispose();
}
}
}
///
/// 释放资源
///
public void Dispose()
{
if (!_disposed)
{
_engine?.Dispose();
_disposed = true;
}
}
///
/// 清理所有实例
///
public static void Cleanup()
{
lock (_lock)
{
foreach (var instance in _instances.Values)
{
instance.Dispose();
}
_instances.Clear();
}
}
}
}