using System; using System.Collections.Generic; using System.Drawing; using System.Drawing.Imaging; using System.IO; using System.Linq; using Tesseract; namespace OCRTest { /// /// OCR配置类 /// public class OCRConfig { /// /// 识别语言(如:"eng", "chi_sim", "chi_sim+eng") /// public string Language { get; set; } = "eng"; /// /// 引擎模式 /// public EngineMode EngineMode { get; set; } = EngineMode.Default; /// /// 字符白名单(只识别这些字符,提高准确率) /// public string Whitelist { get; set; } = null; /// /// 是否启用图像预处理 /// public bool EnablePreprocessing { get; set; } = true; /// /// Tessdata目录路径 /// public string TessDataPath { get; set; } = "./tessdata"; /// /// PSM页面分割模式(0-13) /// public PageSegMode PageSegMode { get; set; } = PageSegMode.Auto; } /// /// OCR识别结果 /// public class OCRResult { /// /// 识别的文本 /// public string Text { get; set; } /// /// 置信度(0-100) /// public float Confidence { get; set; } /// /// 识别耗时(毫秒) /// public long ElapsedMilliseconds { get; set; } /// /// 是否成功 /// public bool Success { get; set; } /// /// 错误信息 /// public string Error { get; set; } } /// /// 高性能OCR识别引擎(线程安全单例) /// public class OCREngine : IDisposable { private static readonly object _lock = new object(); private static Dictionary _instances = new Dictionary(); private TesseractEngine _engine; private OCRConfig _config; private bool _disposed = false; /// /// 获取默认实例(英文识别) /// public static OCREngine Instance { get { return GetInstance("eng"); } } /// /// 获取指定语言的实例 /// /// 语言代码 /// OCR引擎实例 public static OCREngine GetInstance(string language) { lock (_lock) { if (!_instances.ContainsKey(language)) { var config = new OCRConfig { Language = language }; _instances[language] = new OCREngine(config); } return _instances[language]; } } /// /// 获取自定义配置的实例 /// /// 配置对象 /// OCR引擎实例 public static OCREngine GetInstance(OCRConfig config) { string key = $"{config.Language}_{config.EngineMode}"; lock (_lock) { if (!_instances.ContainsKey(key)) { _instances[key] = new OCREngine(config); } return _instances[key]; } } /// /// 私有构造函数 /// private OCREngine(OCRConfig config) { _config = config; InitializeEngine(); } /// /// 初始化Tesseract引擎 /// private void InitializeEngine() { try { // 检查tessdata目录是否存在 if (!Directory.Exists(_config.TessDataPath)) { throw new DirectoryNotFoundException($"Tessdata目录不存在:{_config.TessDataPath}"); } // 创建引擎 _engine = new TesseractEngine(_config.TessDataPath, _config.Language, _config.EngineMode); // 设置PSM模式 _engine.DefaultPageSegMode = _config.PageSegMode; // 设置字符白名单(如果指定) if (!string.IsNullOrEmpty(_config.Whitelist)) { _engine.SetVariable("tessedit_char_whitelist", _config.Whitelist); } // 优化性能配置 _engine.SetVariable("tessedit_pageseg_mode", ((int)_config.PageSegMode).ToString()); Console.WriteLine($"OCR引擎初始化成功 - 语言:{_config.Language}, 模式:{_config.EngineMode}"); } catch (Exception ex) { throw new InvalidOperationException($"OCR引擎初始化失败:{ex.Message}", ex); } } /// /// 识别图片文件 /// /// 图片路径 /// 识别结果 public OCRResult RecognizeText(string imagePath) { if (string.IsNullOrEmpty(imagePath)) return new OCRResult { Success = false, Error = "图片路径不能为空" }; if (!File.Exists(imagePath)) return new OCRResult { Success = false, Error = $"文件不存在:{imagePath}" }; try { var sw = System.Diagnostics.Stopwatch.StartNew(); // 加载图片 using (var pix = Pix.LoadFromFile(imagePath)) { // 图像预处理(如果启用) Pix processedPix = _config.EnablePreprocessing ? PreprocessImage(pix) : pix; // 执行识别 using (var page = _engine.Process(processedPix)) { var text = page.GetText(); var confidence = page.GetMeanConfidence(); sw.Stop(); return new OCRResult { Text = text.Trim(), Confidence = confidence * 100, ElapsedMilliseconds = sw.ElapsedMilliseconds, Success = true }; } } } catch (Exception ex) { return new OCRResult { Success = false, Error = $"识别失败:{ex.Message}" }; } } /// /// 从Bitmap识别文字 /// /// Bitmap对象 /// 识别结果 public OCRResult RecognizeFromBitmap(Bitmap bitmap) { if (bitmap == null) return new OCRResult { Success = false, Error = "Bitmap不能为空" }; try { var sw = System.Diagnostics.Stopwatch.StartNew(); // 将Bitmap转换为Pix using (var pix = BitmapToPix(bitmap)) { // 图像预处理 Pix processedPix = _config.EnablePreprocessing ? PreprocessImage(pix) : pix; // 执行识别 using (var page = _engine.Process(processedPix)) { var text = page.GetText(); var confidence = page.GetMeanConfidence(); sw.Stop(); return new OCRResult { Text = text.Trim(), Confidence = confidence * 100, ElapsedMilliseconds = sw.ElapsedMilliseconds, Success = true }; } } } catch (Exception ex) { return new OCRResult { Success = false, Error = $"识别失败:{ex.Message}" }; } } /// /// 批量识别图片 /// /// 图片路径数组 /// 识别结果列表 public List RecognizeBatch(string[] imagePaths) { var results = new List(); foreach (var path in imagePaths) { var result = RecognizeText(path); results.Add(result); } return results; } /// /// 图像预处理(灰度化、二值化、降噪) /// private Pix PreprocessImage(Pix originalPix) { try { // 转换为灰度图 using (var grayPix = originalPix.ConvertRGBToGray()) { // 二值化处理(Otsu阈值) using (var binaryPix = grayPix.BinarizeOtsuAdaptiveThreshold(200, 200, 10, 10, 0.1f)) { // 返回处理后的图片(需要克隆,因为using会释放) return binaryPix.Clone(); } } } catch { // 预处理失败,返回原图 return originalPix.Clone(); } } /// /// 将Bitmap转换为Pix /// private Pix BitmapToPix(Bitmap bitmap) { // 确保是24位或32位格式 Bitmap tempBitmap = bitmap; bool needDispose = false; if (bitmap.PixelFormat != PixelFormat.Format24bppRgb && bitmap.PixelFormat != PixelFormat.Format32bppArgb) { tempBitmap = new Bitmap(bitmap.Width, bitmap.Height, PixelFormat.Format24bppRgb); using (var g = Graphics.FromImage(tempBitmap)) { g.DrawImage(bitmap, 0, 0); } needDispose = true; } try { // 保存为临时PNG文件 string tempFile = Path.GetTempFileName() + ".png"; tempBitmap.Save(tempFile, ImageFormat.Png); // 加载为Pix var pix = Pix.LoadFromFile(tempFile); // 删除临时文件 File.Delete(tempFile); return pix; } finally { if (needDispose) { tempBitmap.Dispose(); } } } /// /// 释放资源 /// public void Dispose() { if (!_disposed) { _engine?.Dispose(); _disposed = true; } } /// /// 清理所有实例 /// public static void Cleanup() { lock (_lock) { foreach (var instance in _instances.Values) { instance.Dispose(); } _instances.Clear(); } } } }