| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395 |
- using System;
- using System.Collections.Generic;
- using System.Drawing;
- using System.Drawing.Imaging;
- using System.IO;
- using System.Linq;
- using Tesseract;
- namespace OCRTest
- {
- /// <summary>
- /// OCR配置类
- /// </summary>
- public class OCRConfig
- {
- /// <summary>
- /// 识别语言(如:"eng", "chi_sim", "chi_sim+eng")
- /// </summary>
- public string Language { get; set; } = "eng";
- /// <summary>
- /// 引擎模式
- /// </summary>
- public EngineMode EngineMode { get; set; } = EngineMode.Default;
- /// <summary>
- /// 字符白名单(只识别这些字符,提高准确率)
- /// </summary>
- public string Whitelist { get; set; } = null;
- /// <summary>
- /// 是否启用图像预处理
- /// </summary>
- public bool EnablePreprocessing { get; set; } = true;
- /// <summary>
- /// Tessdata目录路径
- /// </summary>
- public string TessDataPath { get; set; } = "./tessdata";
- /// <summary>
- /// PSM页面分割模式(0-13)
- /// </summary>
- public PageSegMode PageSegMode { get; set; } = PageSegMode.Auto;
- }
- /// <summary>
- /// OCR识别结果
- /// </summary>
- public class OCRResult
- {
- /// <summary>
- /// 识别的文本
- /// </summary>
- public string Text { get; set; }
- /// <summary>
- /// 置信度(0-100)
- /// </summary>
- public float Confidence { get; set; }
- /// <summary>
- /// 识别耗时(毫秒)
- /// </summary>
- public long ElapsedMilliseconds { get; set; }
- /// <summary>
- /// 是否成功
- /// </summary>
- public bool Success { get; set; }
- /// <summary>
- /// 错误信息
- /// </summary>
- public string Error { get; set; }
- }
- /// <summary>
- /// 高性能OCR识别引擎(线程安全单例)
- /// </summary>
- public class OCREngine : IDisposable
- {
- private static readonly object _lock = new object();
- private static Dictionary<string, OCREngine> _instances = new Dictionary<string, OCREngine>();
-
- private TesseractEngine _engine;
- private OCRConfig _config;
- private bool _disposed = false;
- /// <summary>
- /// 获取默认实例(英文识别)
- /// </summary>
- public static OCREngine Instance
- {
- get
- {
- return GetInstance("eng");
- }
- }
- /// <summary>
- /// 获取指定语言的实例
- /// </summary>
- /// <param name="language">语言代码</param>
- /// <returns>OCR引擎实例</returns>
- public static OCREngine GetInstance(string language)
- {
- lock (_lock)
- {
- if (!_instances.ContainsKey(language))
- {
- var config = new OCRConfig { Language = language };
- _instances[language] = new OCREngine(config);
- }
- return _instances[language];
- }
- }
- /// <summary>
- /// 获取自定义配置的实例
- /// </summary>
- /// <param name="config">配置对象</param>
- /// <returns>OCR引擎实例</returns>
- public static OCREngine GetInstance(OCRConfig config)
- {
- string key = $"{config.Language}_{config.EngineMode}";
-
- lock (_lock)
- {
- if (!_instances.ContainsKey(key))
- {
- _instances[key] = new OCREngine(config);
- }
- return _instances[key];
- }
- }
- /// <summary>
- /// 私有构造函数
- /// </summary>
- private OCREngine(OCRConfig config)
- {
- _config = config;
- InitializeEngine();
- }
- /// <summary>
- /// 初始化Tesseract引擎
- /// </summary>
- private void InitializeEngine()
- {
- try
- {
- // 检查tessdata目录是否存在
- if (!Directory.Exists(_config.TessDataPath))
- {
- throw new DirectoryNotFoundException($"Tessdata目录不存在:{_config.TessDataPath}");
- }
- // 创建引擎
- _engine = new TesseractEngine(_config.TessDataPath, _config.Language, _config.EngineMode);
- // 设置PSM模式
- _engine.DefaultPageSegMode = _config.PageSegMode;
- // 设置字符白名单(如果指定)
- if (!string.IsNullOrEmpty(_config.Whitelist))
- {
- _engine.SetVariable("tessedit_char_whitelist", _config.Whitelist);
- }
- // 优化性能配置
- _engine.SetVariable("tessedit_pageseg_mode", ((int)_config.PageSegMode).ToString());
-
- Console.WriteLine($"OCR引擎初始化成功 - 语言:{_config.Language}, 模式:{_config.EngineMode}");
- }
- catch (Exception ex)
- {
- throw new InvalidOperationException($"OCR引擎初始化失败:{ex.Message}", ex);
- }
- }
- /// <summary>
- /// 识别图片文件
- /// </summary>
- /// <param name="imagePath">图片路径</param>
- /// <returns>识别结果</returns>
- public OCRResult RecognizeText(string imagePath)
- {
- if (string.IsNullOrEmpty(imagePath))
- return new OCRResult { Success = false, Error = "图片路径不能为空" };
- if (!File.Exists(imagePath))
- return new OCRResult { Success = false, Error = $"文件不存在:{imagePath}" };
- try
- {
- var sw = System.Diagnostics.Stopwatch.StartNew();
- // 加载图片
- using (var pix = Pix.LoadFromFile(imagePath))
- {
- // 图像预处理(如果启用)
- Pix processedPix = _config.EnablePreprocessing ? PreprocessImage(pix) : pix;
- // 执行识别
- using (var page = _engine.Process(processedPix))
- {
- var text = page.GetText();
- var confidence = page.GetMeanConfidence();
- sw.Stop();
- return new OCRResult
- {
- Text = text.Trim(),
- Confidence = confidence * 100,
- ElapsedMilliseconds = sw.ElapsedMilliseconds,
- Success = true
- };
- }
- }
- }
- catch (Exception ex)
- {
- return new OCRResult
- {
- Success = false,
- Error = $"识别失败:{ex.Message}"
- };
- }
- }
- /// <summary>
- /// 从Bitmap识别文字
- /// </summary>
- /// <param name="bitmap">Bitmap对象</param>
- /// <returns>识别结果</returns>
- public OCRResult RecognizeFromBitmap(Bitmap bitmap)
- {
- if (bitmap == null)
- return new OCRResult { Success = false, Error = "Bitmap不能为空" };
- try
- {
- var sw = System.Diagnostics.Stopwatch.StartNew();
- // 将Bitmap转换为Pix
- using (var pix = BitmapToPix(bitmap))
- {
- // 图像预处理
- Pix processedPix = _config.EnablePreprocessing ? PreprocessImage(pix) : pix;
- // 执行识别
- using (var page = _engine.Process(processedPix))
- {
- var text = page.GetText();
- var confidence = page.GetMeanConfidence();
- sw.Stop();
- return new OCRResult
- {
- Text = text.Trim(),
- Confidence = confidence * 100,
- ElapsedMilliseconds = sw.ElapsedMilliseconds,
- Success = true
- };
- }
- }
- }
- catch (Exception ex)
- {
- return new OCRResult
- {
- Success = false,
- Error = $"识别失败:{ex.Message}"
- };
- }
- }
- /// <summary>
- /// 批量识别图片
- /// </summary>
- /// <param name="imagePaths">图片路径数组</param>
- /// <returns>识别结果列表</returns>
- public List<OCRResult> RecognizeBatch(string[] imagePaths)
- {
- var results = new List<OCRResult>();
- foreach (var path in imagePaths)
- {
- var result = RecognizeText(path);
- results.Add(result);
- }
- return results;
- }
- /// <summary>
- /// 图像预处理(灰度化、二值化、降噪)
- /// </summary>
- private Pix PreprocessImage(Pix originalPix)
- {
- try
- {
- // 转换为灰度图
- using (var grayPix = originalPix.ConvertRGBToGray())
- {
- // 二值化处理(Otsu阈值)
- using (var binaryPix = grayPix.BinarizeOtsuAdaptiveThreshold(200, 200, 10, 10, 0.1f))
- {
- // 返回处理后的图片(需要克隆,因为using会释放)
- return binaryPix.Clone();
- }
- }
- }
- catch
- {
- // 预处理失败,返回原图
- return originalPix.Clone();
- }
- }
- /// <summary>
- /// 将Bitmap转换为Pix
- /// </summary>
- private Pix BitmapToPix(Bitmap bitmap)
- {
- // 确保是24位或32位格式
- Bitmap tempBitmap = bitmap;
- bool needDispose = false;
- if (bitmap.PixelFormat != PixelFormat.Format24bppRgb &&
- bitmap.PixelFormat != PixelFormat.Format32bppArgb)
- {
- tempBitmap = new Bitmap(bitmap.Width, bitmap.Height, PixelFormat.Format24bppRgb);
- using (var g = Graphics.FromImage(tempBitmap))
- {
- g.DrawImage(bitmap, 0, 0);
- }
- needDispose = true;
- }
- try
- {
- // 保存为临时PNG文件
- string tempFile = Path.GetTempFileName() + ".png";
- tempBitmap.Save(tempFile, ImageFormat.Png);
- // 加载为Pix
- var pix = Pix.LoadFromFile(tempFile);
- // 删除临时文件
- File.Delete(tempFile);
- return pix;
- }
- finally
- {
- if (needDispose)
- {
- tempBitmap.Dispose();
- }
- }
- }
- /// <summary>
- /// 释放资源
- /// </summary>
- public void Dispose()
- {
- if (!_disposed)
- {
- _engine?.Dispose();
- _disposed = true;
- }
- }
- /// <summary>
- /// 清理所有实例
- /// </summary>
- public static void Cleanup()
- {
- lock (_lock)
- {
- foreach (var instance in _instances.Values)
- {
- instance.Dispose();
- }
- _instances.Clear();
- }
- }
- }
- }
|