using OpenCvSharp; using PaddleOCRSharp; using System; using System.Collections.Generic; using System.Diagnostics; using System.Drawing; using System.IO; using System.Linq; using System.Text; using System.Threading.Tasks; using Tesseract; namespace OCRTest { internal class Program { static void Main(string[] args) { Console.WriteLine("请选择测试模式:"); Console.WriteLine("1. 测试OCR识别引擎"); Console.WriteLine("2. 创建训练项目示例"); Console.WriteLine("3. 预处理训练图片"); Console.WriteLine("4. 验证训练数据集"); Console.WriteLine("5. 分析字符频率"); Console.Write("\n请输入选项(1-5):"); string choice = Console.ReadLine(); switch (choice) { case "1": TestOCREngine(); break; case "2": CreateTrainingProjectExample(); break; case "3": PreprocessTrainingImagesExample(); break; case "4": ValidateDatasetExample(); break; case "5": AnalyzeCharactersExample(); break; default: Console.WriteLine("无效选项,默认执行OCR识别测试"); TestOCREngine(); break; } } /// /// 测试新的OCR引擎 /// static void TestOCREngine() { Console.WriteLine("=== OCR 识别测试 ===\n"); // 测试图片路径(请修改为实际路径) string imagePath = "D:/work/WindowsFormsTest/TestImage/wechat_2025-10-16_143143_308.png"; if (!File.Exists(imagePath)) { Console.WriteLine($"错误:找不到图片文件 {imagePath}"); return; } try { // 方式1:使用单例引擎(推荐,性能最优) Console.WriteLine("【方式1】使用单例引擎识别..."); var ocrEngine = OCREngine.Instance; Stopwatch sw = Stopwatch.StartNew(); string result1 = ocrEngine.RecognizeText(imagePath); sw.Stop(); Console.WriteLine($"识别结果:{result1}"); Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n"); // 方式2:指定语言识别(中文+英文) Console.WriteLine("【方式2】中英文混合识别..."); var ocrEngineChi = OCREngine.GetInstance("chi_sim+eng"); sw.Restart(); string result2 = ocrEngineChi.RecognizeText(imagePath); sw.Stop(); Console.WriteLine($"识别结果:{result2}"); Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n"); // 方式3:使用自定义配置 Console.WriteLine("【方式3】使用自定义配置识别..."); var config = new OCRConfig { Language = "eng", EngineMode = EngineMode.LstmOnly, // 仅使用LSTM,速度更快 Whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-", EnablePreprocessing = true }; var ocrEngineCustom = OCREngine.GetInstance(config); sw.Restart(); string result3 = ocrEngineCustom.RecognizeText(imagePath); sw.Stop(); Console.WriteLine($"识别结果:{result3}"); Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n"); // 方式4:批量识别 Console.WriteLine("【方式4】批量识别测试..."); string[] imagePaths = new string[] { imagePath }; // 可以添加多个图片路径 sw.Restart(); var results = ocrEngine.RecognizeBatch(imagePaths); sw.Stop(); for (int i = 0; i < results.Count; i++) { Console.WriteLine($"图片{i + 1}:{results[i]}"); } Console.WriteLine($"批量识别总耗时:{sw.ElapsedMilliseconds} ms\n"); // 方式5:从Bitmap识别 Console.WriteLine("【方式5】从Bitmap识别..."); using (var bitmap = new Bitmap(imagePath)) { sw.Restart(); string result5 = ocrEngine.RecognizeFromBitmap(bitmap); sw.Stop(); Console.WriteLine($"识别结果:{result5}"); Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n"); } Console.WriteLine("测试完成!按任意键退出..."); Console.ReadKey(); } catch (Exception ex) { Console.WriteLine($"发生错误:{ex.Message}"); Console.WriteLine($"堆栈跟踪:{ex.StackTrace}"); } } /// /// 创建训练项目示例 /// static void CreateTrainingProjectExample() { Console.WriteLine("\n=== 创建训练项目示例 ===\n"); string projectFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject"; try { OCRTrainingHelper.CreateSampleProject(projectFolder); Console.WriteLine("\n项目创建成功!"); Console.WriteLine($"项目路径:{projectFolder}"); Console.WriteLine("\n下一步:"); Console.WriteLine("1. 将训练图片放入 raw_images 文件夹"); Console.WriteLine("2. 按照 README.md 的说明进行训练"); } catch (Exception ex) { Console.WriteLine($"创建失败:{ex.Message}"); } Console.WriteLine("\n按任意键继续..."); Console.ReadKey(); } /// /// 预处理训练图片示例 /// static void PreprocessTrainingImagesExample() { Console.WriteLine("\n=== 预处理训练图片 ===\n"); string inputFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\raw_images"; string outputFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\processed_images"; if (!Directory.Exists(inputFolder)) { Console.WriteLine($"输入文件夹不存在:{inputFolder}"); Console.WriteLine("请先创建训练项目并放入原始图片"); return; } try { OCRTrainingHelper.PreprocessTrainingImages( inputFolder, outputFolder, targetWidth: 800, targetHeight: 0 // 自动计算高度 ); } catch (Exception ex) { Console.WriteLine($"处理失败:{ex.Message}"); } Console.WriteLine("\n按任意键继续..."); Console.ReadKey(); } /// /// 验证数据集示例 /// static void ValidateDatasetExample() { Console.WriteLine("\n=== 验证训练数据集 ===\n"); string datasetFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\processed_images"; if (!Directory.Exists(datasetFolder)) { Console.WriteLine($"数据集文件夹不存在:{datasetFolder}"); return; } try { var missingFiles = OCRTrainingHelper.ValidateDataset(datasetFolder); if (missingFiles.Count == 0) { Console.WriteLine("\n✓ 数据集完整,可以开始训练"); } else { Console.WriteLine($"\n✗ 发现 {missingFiles.Count} 个缺少标注文件的图片"); Console.WriteLine("请为这些图片创建对应的 .txt 标注文件"); } } catch (Exception ex) { Console.WriteLine($"验证失败:{ex.Message}"); } Console.WriteLine("\n按任意键继续..."); Console.ReadKey(); } /// /// 分析字符频率示例 /// static void AnalyzeCharactersExample() { Console.WriteLine("\n=== 分析字符频率 ===\n"); string labelFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\labels"; if (!Directory.Exists(labelFolder)) { Console.WriteLine($"标注文件夹不存在:{labelFolder}"); return; } try { var frequency = OCRTrainingHelper.AnalyzeCharacterFrequency(labelFolder); Console.WriteLine("\n提示:根据字符频率可以优化白名单配置"); Console.WriteLine("高频字符应该优先保证识别准确率"); } catch (Exception ex) { Console.WriteLine($"分析失败:{ex.Message}"); } Console.WriteLine("\n按任意键继续..."); Console.ReadKey(); } public static void OCRTest1() { OCRModelConfig config = null; PaddleOCREngine engine = new PaddleOCREngine(config, ""); bool Image1Finsh = false; bool Image2Finsh = false; bool Image3Finsh = false; Mat image1 = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/Date1_20260411170215701.jpg"); OCRResult ocrResult1 = engine.DetectText(image1.ToBytes()); Stopwatch sw = Stopwatch.StartNew(); Task.Run(() => { Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/Date1_20260411170215701.jpg"); OCRResult ocrResult = engine.DetectText(image.ToBytes()); foreach (var item in ocrResult.TextBlocks) { Console.WriteLine("图像1结果:{0}", item.Text); } Image1Finsh = true; //if (Image1Finsh && Image2Finsh && Image3Finsh) //{ sw.Stop(); Console.WriteLine("耗时:" + sw.ElapsedMilliseconds); //} }); //Task.Run(() => //{ // Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/3_20250908104000075.jpg", ImreadModes.Color); // OCRResult ocrResult = engine.DetectText(image.ToBytes()); // foreach (var item in ocrResult.TextBlocks) // { // Console.WriteLine("图像2结果:{0}", item.Text); // } // Image2Finsh = true; // if (Image1Finsh && Image2Finsh && Image3Finsh) // { // sw.Stop(); // Console.WriteLine("耗时:" + sw.ElapsedMilliseconds); // } //}); //Task.Run(() => //{ // Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/3_20250908104000075.jpg", ImreadModes.Color); // OCRResult ocrResult = engine.DetectText(image.ToBytes()); // foreach (var item in ocrResult.TextBlocks) // { // Console.WriteLine("图像3结果:{0}", item.Text); // } // Image3Finsh = true; // if (Image1Finsh && Image2Finsh && Image3Finsh) // { // sw.Stop(); // Console.WriteLine("耗时:" + sw.ElapsedMilliseconds); // } //}); Console.ReadKey(); } public static string OCRTest2() { using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default)) { // 只允许识别英文字母、数字和常见符号 engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-@#$%"); using (var img = Pix.LoadFromFile("D:/work/WindowsFormsTest/TestImage/wechat_2025-10-16_143143_308.png")) { using (var page = engine.Process(img)) { var text = page.GetText(); Console.WriteLine(text); return text; } } } } } }