|
|
|
@@ -20,7 +20,7 @@ internal class EvaluationProcessor
|
|
|
|
|
/// <see cref="Regex"/> expression for extracting whole words from scan results
|
|
|
|
|
/// </summary>
|
|
|
|
|
private static readonly Regex wordRegex = new(
|
|
|
|
|
@"[\w'\-]{2,}",
|
|
|
|
|
@"[\w'\-äöüÄÖÜß]{2,}",
|
|
|
|
|
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
@@ -42,6 +42,24 @@ internal class EvaluationProcessor
|
|
|
|
|
.Use(new DuplicateFilter())
|
|
|
|
|
.Complete(new RegexFilter(wordRegex));
|
|
|
|
|
|
|
|
|
|
private IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
|
|
|
|
{
|
|
|
|
|
var preprocessing = new ProcessorChainConfiguration<MagickImage, MagickImage>()
|
|
|
|
|
.Use(new CloneImageProcessor())
|
|
|
|
|
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
|
|
|
|
|
.Use(new NormalizeProcessor())
|
|
|
|
|
.Use(_thresholdProcessor)
|
|
|
|
|
.Use(new AddBorderProcessor(10))
|
|
|
|
|
.Use(new BinarizeProcessor())
|
|
|
|
|
.Use(new NegateCloneProcessor())
|
|
|
|
|
.Complete(OnPreprocessed);
|
|
|
|
|
|
|
|
|
|
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
|
|
|
|
|
.Use(preprocessing)
|
|
|
|
|
.Use(tesseractProcessor)
|
|
|
|
|
.Complete(postProcessor);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private static readonly TesseractProcessor tesseractProcessor = new(tesseractConfig);
|
|
|
|
|
|
|
|
|
|
private readonly StopwatchProcessor<MagickImage, MagickImage> _thresholdProcessor;
|
|
|
|
@@ -75,24 +93,6 @@ internal class EvaluationProcessor
|
|
|
|
|
await JsonSerializer.SerializeAsync(file, result);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
private IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
|
|
|
|
{
|
|
|
|
|
var preprocessing = new ProcessorChainConfiguration<MagickImage, MagickImage>()
|
|
|
|
|
.Use(new CloneImageProcessor())
|
|
|
|
|
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
|
|
|
|
|
.Use(new NormalizeProcessor())
|
|
|
|
|
.Use(_thresholdProcessor)
|
|
|
|
|
.Use(new AddBorderProcessor(10))
|
|
|
|
|
.Use(new BinarizeProcessor())
|
|
|
|
|
.Use(new NegateCloneProcessor())
|
|
|
|
|
.Complete(OnPreprocessed);
|
|
|
|
|
|
|
|
|
|
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
|
|
|
|
|
.Use(preprocessing)
|
|
|
|
|
.Use(tesseractProcessor)
|
|
|
|
|
.Complete(postProcessor);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private IEnumerable<MagickImage> OnPreprocessed(IEnumerable<MagickImage> images)
|
|
|
|
|
{
|
|
|
|
|
var tImages = images.ToArray();
|
|
|
|
|