using OpenCvSharp;
using PaddleOCRSharp;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Tesseract;
namespace OCRTest
{
internal class Program
{
static void Main(string[] args)
{
Console.WriteLine("请选择测试模式:");
Console.WriteLine("1. 测试OCR识别引擎");
Console.WriteLine("2. 创建训练项目示例");
Console.WriteLine("3. 预处理训练图片");
Console.WriteLine("4. 验证训练数据集");
Console.WriteLine("5. 分析字符频率");
Console.Write("\n请输入选项(1-5):");
string choice = Console.ReadLine();
switch (choice)
{
case "1":
TestOCREngine();
break;
case "2":
CreateTrainingProjectExample();
break;
case "3":
PreprocessTrainingImagesExample();
break;
case "4":
ValidateDatasetExample();
break;
case "5":
AnalyzeCharactersExample();
break;
default:
Console.WriteLine("无效选项,默认执行OCR识别测试");
TestOCREngine();
break;
}
}
///
/// 测试新的OCR引擎
///
static void TestOCREngine()
{
Console.WriteLine("=== OCR 识别测试 ===\n");
// 测试图片路径(请修改为实际路径)
string imagePath = "D:/work/WindowsFormsTest/TestImage/wechat_2025-10-16_143143_308.png";
if (!File.Exists(imagePath))
{
Console.WriteLine($"错误:找不到图片文件 {imagePath}");
return;
}
try
{
// 方式1:使用单例引擎(推荐,性能最优)
Console.WriteLine("【方式1】使用单例引擎识别...");
var ocrEngine = OCREngine.Instance;
Stopwatch sw = Stopwatch.StartNew();
string result1 = ocrEngine.RecognizeText(imagePath);
sw.Stop();
Console.WriteLine($"识别结果:{result1}");
Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
// 方式2:指定语言识别(中文+英文)
Console.WriteLine("【方式2】中英文混合识别...");
var ocrEngineChi = OCREngine.GetInstance("chi_sim+eng");
sw.Restart();
string result2 = ocrEngineChi.RecognizeText(imagePath);
sw.Stop();
Console.WriteLine($"识别结果:{result2}");
Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
// 方式3:使用自定义配置
Console.WriteLine("【方式3】使用自定义配置识别...");
var config = new OCRConfig
{
Language = "eng",
EngineMode = EngineMode.LstmOnly, // 仅使用LSTM,速度更快
Whitelist = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-",
EnablePreprocessing = true
};
var ocrEngineCustom = OCREngine.GetInstance(config);
sw.Restart();
string result3 = ocrEngineCustom.RecognizeText(imagePath);
sw.Stop();
Console.WriteLine($"识别结果:{result3}");
Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
// 方式4:批量识别
Console.WriteLine("【方式4】批量识别测试...");
string[] imagePaths = new string[] { imagePath }; // 可以添加多个图片路径
sw.Restart();
var results = ocrEngine.RecognizeBatch(imagePaths);
sw.Stop();
for (int i = 0; i < results.Count; i++)
{
Console.WriteLine($"图片{i + 1}:{results[i]}");
}
Console.WriteLine($"批量识别总耗时:{sw.ElapsedMilliseconds} ms\n");
// 方式5:从Bitmap识别
Console.WriteLine("【方式5】从Bitmap识别...");
using (var bitmap = new Bitmap(imagePath))
{
sw.Restart();
string result5 = ocrEngine.RecognizeFromBitmap(bitmap);
sw.Stop();
Console.WriteLine($"识别结果:{result5}");
Console.WriteLine($"耗时:{sw.ElapsedMilliseconds} ms\n");
}
Console.WriteLine("测试完成!按任意键退出...");
Console.ReadKey();
}
catch (Exception ex)
{
Console.WriteLine($"发生错误:{ex.Message}");
Console.WriteLine($"堆栈跟踪:{ex.StackTrace}");
}
}
///
/// 创建训练项目示例
///
static void CreateTrainingProjectExample()
{
Console.WriteLine("\n=== 创建训练项目示例 ===\n");
string projectFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject";
try
{
OCRTrainingHelper.CreateSampleProject(projectFolder);
Console.WriteLine("\n项目创建成功!");
Console.WriteLine($"项目路径:{projectFolder}");
Console.WriteLine("\n下一步:");
Console.WriteLine("1. 将训练图片放入 raw_images 文件夹");
Console.WriteLine("2. 按照 README.md 的说明进行训练");
}
catch (Exception ex)
{
Console.WriteLine($"创建失败:{ex.Message}");
}
Console.WriteLine("\n按任意键继续...");
Console.ReadKey();
}
///
/// 预处理训练图片示例
///
static void PreprocessTrainingImagesExample()
{
Console.WriteLine("\n=== 预处理训练图片 ===\n");
string inputFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\raw_images";
string outputFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\processed_images";
if (!Directory.Exists(inputFolder))
{
Console.WriteLine($"输入文件夹不存在:{inputFolder}");
Console.WriteLine("请先创建训练项目并放入原始图片");
return;
}
try
{
OCRTrainingHelper.PreprocessTrainingImages(
inputFolder,
outputFolder,
targetWidth: 800,
targetHeight: 0 // 自动计算高度
);
}
catch (Exception ex)
{
Console.WriteLine($"处理失败:{ex.Message}");
}
Console.WriteLine("\n按任意键继续...");
Console.ReadKey();
}
///
/// 验证数据集示例
///
static void ValidateDatasetExample()
{
Console.WriteLine("\n=== 验证训练数据集 ===\n");
string datasetFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\processed_images";
if (!Directory.Exists(datasetFolder))
{
Console.WriteLine($"数据集文件夹不存在:{datasetFolder}");
return;
}
try
{
var missingFiles = OCRTrainingHelper.ValidateDataset(datasetFolder);
if (missingFiles.Count == 0)
{
Console.WriteLine("\n✓ 数据集完整,可以开始训练");
}
else
{
Console.WriteLine($"\n✗ 发现 {missingFiles.Count} 个缺少标注文件的图片");
Console.WriteLine("请为这些图片创建对应的 .txt 标注文件");
}
}
catch (Exception ex)
{
Console.WriteLine($"验证失败:{ex.Message}");
}
Console.WriteLine("\n按任意键继续...");
Console.ReadKey();
}
///
/// 分析字符频率示例
///
static void AnalyzeCharactersExample()
{
Console.WriteLine("\n=== 分析字符频率 ===\n");
string labelFolder = @"D:\work\WindowsFormsTest\OCRTest\TrainingProject\labels";
if (!Directory.Exists(labelFolder))
{
Console.WriteLine($"标注文件夹不存在:{labelFolder}");
return;
}
try
{
var frequency = OCRTrainingHelper.AnalyzeCharacterFrequency(labelFolder);
Console.WriteLine("\n提示:根据字符频率可以优化白名单配置");
Console.WriteLine("高频字符应该优先保证识别准确率");
}
catch (Exception ex)
{
Console.WriteLine($"分析失败:{ex.Message}");
}
Console.WriteLine("\n按任意键继续...");
Console.ReadKey();
}
public static void OCRTest1()
{
OCRModelConfig config = null;
PaddleOCREngine engine = new PaddleOCREngine(config, "");
bool Image1Finsh = false;
bool Image2Finsh = false;
bool Image3Finsh = false;
Mat image1 = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/Date1_20260411170215701.jpg");
OCRResult ocrResult1 = engine.DetectText(image1.ToBytes());
Stopwatch sw = Stopwatch.StartNew();
Task.Run(() =>
{
Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/Date1_20260411170215701.jpg");
OCRResult ocrResult = engine.DetectText(image.ToBytes());
foreach (var item in ocrResult.TextBlocks)
{
Console.WriteLine("图像1结果:{0}", item.Text);
}
Image1Finsh = true;
//if (Image1Finsh && Image2Finsh && Image3Finsh)
//{
sw.Stop();
Console.WriteLine("耗时:" + sw.ElapsedMilliseconds);
//}
});
//Task.Run(() =>
//{
// Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/3_20250908104000075.jpg", ImreadModes.Color);
// OCRResult ocrResult = engine.DetectText(image.ToBytes());
// foreach (var item in ocrResult.TextBlocks)
// {
// Console.WriteLine("图像2结果:{0}", item.Text);
// }
// Image2Finsh = true;
// if (Image1Finsh && Image2Finsh && Image3Finsh)
// {
// sw.Stop();
// Console.WriteLine("耗时:" + sw.ElapsedMilliseconds);
// }
//});
//Task.Run(() =>
//{
// Mat image = Cv2.ImRead("D:/work/WindowsFormsTest/TestImage/3_20250908104000075.jpg", ImreadModes.Color);
// OCRResult ocrResult = engine.DetectText(image.ToBytes());
// foreach (var item in ocrResult.TextBlocks)
// {
// Console.WriteLine("图像3结果:{0}", item.Text);
// }
// Image3Finsh = true;
// if (Image1Finsh && Image2Finsh && Image3Finsh)
// {
// sw.Stop();
// Console.WriteLine("耗时:" + sw.ElapsedMilliseconds);
// }
//});
Console.ReadKey();
}
public static string OCRTest2()
{
using (var engine = new TesseractEngine(@"./tessdata", "eng", EngineMode.Default))
{
// 只允许识别英文字母、数字和常见符号
engine.SetVariable("tessedit_char_whitelist", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789 .,!?-@#$%");
using (var img = Pix.LoadFromFile("D:/work/WindowsFormsTest/TestImage/wechat_2025-10-16_143143_308.png"))
{
using (var page = engine.Process(img))
{
var text = page.GetText();
Console.WriteLine(text);
return text;
}
}
}
}
}
}