Improved postprocessing analysis
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
// See https://aka.ms/new-console-template for more information
|
||||
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
using ImageMagick;
|
||||
using Ocr.Tesseract;
|
||||
@@ -10,22 +11,25 @@ using Ocr.Tesseract.Screenshots.Threshold;
|
||||
using Process.Abstract.Configuration;
|
||||
using Process.Interface;
|
||||
|
||||
var wordRegex = new Regex(
|
||||
@"[\w'\-]{2,}",
|
||||
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
|
||||
);
|
||||
var tesseractConfig = new TesseractScreenshotConfiguration
|
||||
{
|
||||
DataPath = "tessdata",
|
||||
Languages = new[] { "eng", "deu" }
|
||||
};
|
||||
|
||||
Console.WriteLine("Hello, World!");
|
||||
var jsonOptions = new JsonSerializerOptions() { WriteIndented = true };
|
||||
|
||||
var processor = MakeProcessor();
|
||||
processor.Process(new[] { new MagickImage(args.Single()) });
|
||||
|
||||
return;
|
||||
|
||||
IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
||||
{
|
||||
var wordRegex = new Regex(
|
||||
@"[\w'\-]{2,}",
|
||||
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
|
||||
);
|
||||
|
||||
var tesseractConfig = new TesseractScreenshotConfiguration
|
||||
{
|
||||
DataPath = "tessdata",
|
||||
Languages = new[] { "eng", "deu" }
|
||||
};
|
||||
|
||||
var preprocessing = new ProcessorChainConfiguration<MagickImage, MagickImage>()
|
||||
.Use(new CloneImageProcessor())
|
||||
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
|
||||
@@ -36,11 +40,15 @@ IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
||||
.Complete(new NegateCloneProcessor());
|
||||
|
||||
var postprocessing = new ProcessorChainConfiguration<ScanResult, ScanResult>()
|
||||
.Use(new ProcessingEvent<ScanResult>((_, data) => WriteToFile(data, "source")))
|
||||
.Use(new ConfidenceFilter(50))
|
||||
// todo insert processing events and write to json files
|
||||
.Use(new ProcessingEvent<ScanResult>((_, data) => WriteToFile(data, "confidence")))
|
||||
.Use(new ToLowerProcessor())
|
||||
.Use(new ProcessingEvent<ScanResult>((_, data) => WriteToFile(data, "normalize")))
|
||||
.Use(new DuplicateFilter())
|
||||
.Complete(new RegexFilter(wordRegex));
|
||||
.Use(new ProcessingEvent<ScanResult>((_, data) => WriteToFile(data, "duplicates")))
|
||||
.Use(new RegexFilter(wordRegex))
|
||||
.Complete(new ProcessingEvent<ScanResult>((_, data) => WriteToFile(data, "regex")));
|
||||
|
||||
var scan = new TesseractProcessor(tesseractConfig);
|
||||
|
||||
@@ -49,3 +57,25 @@ IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
||||
.Use(scan)
|
||||
.Complete(postprocessing);
|
||||
}
|
||||
|
||||
void WriteToFile(ICollection<ScanResult> data, string name)
|
||||
{
|
||||
using var file1 = File.Open($"{name}.detailed.json", FileMode.Create);
|
||||
JsonSerializer.Serialize(file1, data.Select(WordInfo.Create), jsonOptions);
|
||||
|
||||
using var file2 = File.Open($"{name}.json", FileMode.Create);
|
||||
JsonSerializer.Serialize(file2, data.Select(d => d.Word.Text), jsonOptions);
|
||||
}
|
||||
|
||||
struct WordInfo
|
||||
{
|
||||
public string Text { get; set; }
|
||||
|
||||
public double Confidence { get; set; }
|
||||
|
||||
public static WordInfo Create(ScanResult result) => new()
|
||||
{
|
||||
Text = result.Word.Text,
|
||||
Confidence = result.Word.Confidence
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"profiles": {
|
||||
"Refresh thesis results": {
|
||||
"commandName": "Project",
|
||||
"commandLineArgs": "source.png",
|
||||
"workingDirectory": "C:\\Users\\Simon\\Documents\\Userdata\\FH\\SEM5\\BA\\bsc\\include\\postprocessing"
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user