| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359 |
- using OpenCvSharp;
- using PaddleOCRSharp;
- using System;
- using System.Collections.Generic;
- using System.Diagnostics;
- using System.Drawing;
- using System.IO;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- using Tesseract;
- namespace OCRTest
- {
- internal class Program
- {
- static void Main(string[] args)
- {
- Console.WriteLine("请选择测试模式:");
- Console.WriteLine("1. 测试OCR识别引擎");
- Console.WriteLine("2. 创建训练项目示例");
- Console.WriteLine("3. 预处理训练图片");
- Console.WriteLine("4. 验证训练数据集");
- Console.WriteLine("5. 分析字符频率");
- Console.Write("\n请输入选项(1-5):");
- string choice = Console.ReadLine();
- switch (choice)
- {
- case "1":
- TestOCREngine();
- break;
- case "2":
- CreateTrainingProjectExample();
- break;
- case "3":
- PreprocessTrainingImagesExample();
- break;
- case "4":
- ValidateDatasetExample();
- break;
- case "5":
- AnalyzeCharactersExample();
- break;
- default:
- Console.WriteLine("无效选项,默认执行OCR识别测试");
- TestOCREngine();
- break;
- }
- }
- /// <summary>
- /// 测试新的OCR引擎
- /// </summary>
- static void TestOCREngine()
- {
- Console.WriteLine("=== OCR 识别测试 ===\n");
- // 测试图片路径(请修改为实际路径)
- string imagePath = "D:/work/WindowsFormsTest/TestImage/wechat_2025-10-16_143143_308.png";
- if (!File.Exists(imagePath))
- {
- Console.WriteLine($"错误:找不到图片文件 {imagePath}");
- return;
- }
- try
- {
- // 方式1:使用单例引擎(推荐,性能最优)
- Console.WriteLine("【方式1】使用单例引擎识别...");
- var ocrEngine = OCREngine.Instance;
-
- Stopwatch sw = Stopwatch.StartNew();
- string result1 = ocrEngine.RecognizeText(imagePath);
- sw.Stop();
-
- Console.WriteLine($"识别结果:{result1}");
- Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
- // 方式2:指定语言识别(中文+英文)
- Console.WriteLine("【方式2】中英文混合识别...");
- var ocrEngineChi = OCREngine.GetInstance("chi_sim+eng");
-
- sw.Restart();
- string result2 = ocrEngineChi.RecognizeText(imagePath);
- sw.Stop();
-
- Console.WriteLine($"识别结果:{result2}");
- Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
- // 方式3:使用自定义配置
- Console.WriteLine("【方式3】使用自定义配置识别...");
- var config = new OCRConfig
- {
- Language = "eng",
- EngineMode = EngineMode.LstmOnly, // 仅使用LSTM,速度更快
- Whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-",
- EnablePreprocessing = true
- };
-
- var ocrEngineCustom = OCREngine.GetInstance(config);
-
- sw.Restart();
- string result3 = ocrEngineCustom.RecognizeText(imagePath);
- sw.Stop();
-
- Console.WriteLine($"识别结果:{result3}");
- Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
- // 方式4:批量识别
- Console.WriteLine("【方式4】批量识别测试...");
- string[] imagePaths = new string[] { imagePath }; // 可以添加多个图片路径
-
- sw.Restart();
- var results = ocrEngine.RecognizeBatch(imagePaths);
- sw.Stop();
-
- for (int i = 0; i < results.Count; i++)
- {
- Console.WriteLine($"图片{i + 1}:{results[i]}");
- }
- Console.WriteLine($"批量识别总耗时:{sw.ElapsedMilliseconds} ms\n");
- // 方式5:从Bitmap识别
- Console.WriteLine("【方式5】从Bitmap识别...");
- using (var bitmap = new Bitmap(imagePath))
- {
- sw.Restart();
- string result5 = ocrEngine.RecognizeFromBitmap(bitmap);
- sw.Stop();
-
- Console.WriteLine($"识别结果:{result5}");
- Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
- }
- Console.WriteLine("测试完成!按任意键退出...");
- Console.ReadKey();
- }
- catch (Exception ex)
- {
- Console.WriteLine($"发生错误:{ex.Message}");
- Console.WriteLine($"堆栈跟踪:{ex.StackTrace}");
- }
- }
- /// <summary>
- /// 创建训练项目示例
- /// </summary>
- static void CreateTrainingProjectExample()
- {
- Console.WriteLine("\n=== 创建训练项目示例 ===\n");
- string projectFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject";
-
- try
- {
- OCRTrainingHelper.CreateSampleProject(projectFolder);
-
- Console.WriteLine("\n项目创建成功!");
- Console.WriteLine($"项目路径:{projectFolder}");
- Console.WriteLine("\n下一步:");
- Console.WriteLine("1. 将训练图片放入 raw_images 文件夹");
- Console.WriteLine("2. 按照 README.md 的说明进行训练");
- }
- catch (Exception ex)
- {
- Console.WriteLine($"创建失败:{ex.Message}");
- }
- Console.WriteLine("\n按任意键继续...");
- Console.ReadKey();
- }
- /// <summary>
- /// 预处理训练图片示例
- /// </summary>
- static void PreprocessTrainingImagesExample()
- {
- Console.WriteLine("\n=== 预处理训练图片 ===\n");
- string inputFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\raw_images";
- string outputFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\processed_images";
- if (!Directory.Exists(inputFolder))
- {
- Console.WriteLine($"输入文件夹不存在:{inputFolder}");
- Console.WriteLine("请先创建训练项目并放入原始图片");
- return;
- }
- try
- {
- OCRTrainingHelper.PreprocessTrainingImages(
- inputFolder,
- outputFolder,
- targetWidth: 800,
- targetHeight: 0 // 自动计算高度
- );
- }
- catch (Exception ex)
- {
- Console.WriteLine($"处理失败:{ex.Message}");
- }
- Console.WriteLine("\n按任意键继续...");
- Console.ReadKey();
- }
- /// <summary>
- /// 验证数据集示例
- /// </summary>
- static void ValidateDatasetExample()
- {
- Console.WriteLine("\n=== 验证训练数据集 ===\n");
- string datasetFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\processed_images";
- if (!Directory.Exists(datasetFolder))
- {
- Console.WriteLine($"数据集文件夹不存在:{datasetFolder}");
- return;
- }
- try
- {
- var missingFiles = OCRTrainingHelper.ValidateDataset(datasetFolder);
-
- if (missingFiles.Count == 0)
- {
- Console.WriteLine("\n✓ 数据集完整,可以开始训练");
- }
- else
- {
- Console.WriteLine($"\n✗ 发现 {missingFiles.Count} 个缺少标注文件的图片");
- Console.WriteLine("请为这些图片创建对应的 .txt 标注文件");
- }
- }
- catch (Exception ex)
- {
- Console.WriteLine($"验证失败:{ex.Message}");
- }
- Console.WriteLine("\n按任意键继续...");
- Console.ReadKey();
- }
- /// <summary>
- /// 分析字符频率示例
- /// </summary>
- static void AnalyzeCharactersExample()
- {
- Console.WriteLine("\n=== 分析字符频率 ===\n");
- string labelFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\labels";
- if (!Directory.Exists(labelFolder))
- {
- Console.WriteLine($"标注文件夹不存在:{labelFolder}");
- return;
- }
- try
- {
- var frequency = OCRTrainingHelper.AnalyzeCharacterFrequency(labelFolder);
-
- Console.WriteLine("\n提示:根据字符频率可以优化白名单配置");
- Console.WriteLine("高频字符应该优先保证识别准确率");
- }
- catch (Exception ex)
- {
- Console.WriteLine($"分析失败:{ex.Message}");
- }
- Console.WriteLine("\n按任意键继续...");
- Console.ReadKey();
- }
- public static void OCRTest1()
- {
- OCRModelConfig config = null;
- PaddleOCREngine engine = new PaddleOCREngine(config, "");
- bool Image1Finsh = false;
- bool Image2Finsh = false;
- bool Image3Finsh = false;
- Mat image1 = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/Date1_20260411170215701.jpg");
- OCRResult ocrResult1 = engine.DetectText(image1.ToBytes());
- Stopwatch sw = Stopwatch.StartNew();
- Task.Run(() =>
- {
- Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/Date1_20260411170215701.jpg");
- OCRResult ocrResult = engine.DetectText(image.ToBytes());
- foreach (var item in ocrResult.TextBlocks)
- {
- Console.WriteLine("图像1结果:{0}", item.Text);
- }
- Image1Finsh = true;
- //if (Image1Finsh && Image2Finsh && Image3Finsh)
- //{
- sw.Stop();
- Console.WriteLine("耗时:" + sw.ElapsedMilliseconds);
- //}
- });
- //Task.Run(() =>
- //{
- // Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/3_20250908104000075.jpg", ImreadModes.Color);
- // OCRResult ocrResult = engine.DetectText(image.ToBytes());
- // foreach (var item in ocrResult.TextBlocks)
- // {
- // Console.WriteLine("图像2结果:{0}", item.Text);
- // }
- // Image2Finsh = true;
- // if (Image1Finsh && Image2Finsh && Image3Finsh)
- // {
- // sw.Stop();
- // Console.WriteLine("耗时:" + sw.ElapsedMilliseconds);
- // }
- //});
- //Task.Run(() =>
- //{
- // Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/3_20250908104000075.jpg", ImreadModes.Color);
- // OCRResult ocrResult = engine.DetectText(image.ToBytes());
- // foreach (var item in ocrResult.TextBlocks)
- // {
- // Console.WriteLine("图像3结果:{0}", item.Text);
- // }
- // Image3Finsh = true;
- // if (Image1Finsh && Image2Finsh && Image3Finsh)
- // {
- // sw.Stop();
- // Console.WriteLine("耗时:" + sw.ElapsedMilliseconds);
- // }
- //});
- Console.ReadKey();
- }
- public static string OCRTest2()
- {
- using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
- {
- // 只允许识别英文字母、数字和常见符号
- engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-@#$%");
- using (var img = Pix.LoadFromFile("D:/work/WindowsFormsTest/TestImage/wechat_2025-10-16_143143_308.png"))
- {
- using (var page = engine.Process(img))
- {
- var text = page.GetText();
- Console.WriteLine(text);
- return text;
- }
- }
- }
- }
- }
- }
|