Added split processors

This commit is contained in:
Simon Gruber
2023-11-14 14:51:16 +01:00
parent db6e403c01
commit fd26cf45e0
12 changed files with 207 additions and 71 deletions
+30 -7
View File
@@ -5,6 +5,7 @@ using Ocr.Tesseract.Configuration;
using Ocr.Tesseract.Models;
using Ocr.Tesseract.Screenshots;
using Ocr.Tesseract.Screenshots.Configuration;
using Ocr.Tesseract.Screenshots.Threshold;
using Process.Abstract.Configuration;
using Process.Interface;
using System.Text.RegularExpressions;
@@ -69,15 +70,37 @@ namespace Common
private IProcessor<MagickImage, ScanResult> MakeProcessor()
{
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
.Use(new ScreenshotProcessor(ImageProcessorConfiguration)) // Preprocess input data
.Use(new ProcessingEvent<MagickImage>(OnProcessing)) // Scan
.Use(new TesseractProcessor(TesseractConfiguration)) // Scan
.Use(new ProcessingEvent<ScanResult>(OnProcessed)) // Scan
.Use(new ConfidenceFilter(50)) // Process output data
.Use(new DuplicateFilter())
var threshold =
new ThresholdAdaptiveProcessor(ImageProcessorConfiguration.ThresholdWidth,
ImageProcessorConfiguration.ThresholdHeight);
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Kapur);
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.OTSU);
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Triangle);
// var threshold = new ThresholdProcessor(60);
var preprocessing = new ProcessorChainConfiguration<MagickImage, MagickImage>()
.Use(new CloneImageProcessor())
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
.Use(new NormalizeProcessor())
.Use(threshold)
.Use(new AddBorderProcessor(10))
.Use(new BinarizeProcessor())
.Complete(new NegateCloneProcessor());
var postprocessing = new ProcessorChainConfiguration<ScanResult, ScanResult>()
.Use(new ConfidenceFilter(50))
.Use(new ToLowerProcessor())
.Use(new DuplicateFilter())
.Complete(new RegexFilter(wordRegex));
var scan = new TesseractProcessor(TesseractConfiguration);
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
.Use(preprocessing)
.Use(new ProcessingEvent<MagickImage>(OnProcessing))
.Use(scan)
.Use(new ProcessingEvent<ScanResult>(OnProcessed))
.Complete(postprocessing);
}
protected virtual void OnProcessing(IProcessor sender, ICollection<MagickImage> inputs)
@@ -0,0 +1,19 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
namespace Ocr.Tesseract.Screenshots;
public class AddBorderProcessor : SingleImageProcessor
{
private readonly int _thickness;
public AddBorderProcessor(int thickness)
{
_thickness = thickness;
}
/// <inheritdoc />
protected override MagickImage Process(MagickImage image) => _thickness > 0
? image.AddBorder(_thickness, MagickColors.White)
: image;
}
@@ -0,0 +1,10 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
namespace Ocr.Tesseract.Screenshots;
public class BinarizeProcessor : SingleImageProcessor
{
/// <inheritdoc />
protected override MagickImage Process(MagickImage image) => image.ToBinary();
}
@@ -0,0 +1,10 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
namespace Ocr.Tesseract.Screenshots;
public class CloneImageProcessor : SingleImageProcessor
{
/// <inheritdoc />
protected override MagickImage Process(MagickImage image) => image.CloneImage();
}
@@ -0,0 +1,19 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
using Process.Abstract;
using System.Collections.Generic;
namespace Ocr.Tesseract.Screenshots;
public class NegateCloneProcessor : Processor<MagickImage, MagickImage>
{
/// <inheritdoc />
public override IEnumerable<MagickImage> Process(IEnumerable<MagickImage> inputs)
{
foreach (var input in inputs)
{
yield return input;
yield return input.CloneImage().NegateColors();
}
}
}
@@ -0,0 +1,13 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
namespace Ocr.Tesseract.Screenshots;
public class NormalizeProcessor : SingleImageProcessor
{
/// <inheritdoc />
protected override MagickImage Process(MagickImage image) => image
.NormalizeImage()
.RemoveAlpha(MagickColors.White)
.ToGrayscale();
}
@@ -0,0 +1,28 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
namespace Ocr.Tesseract.Screenshots;
public class ResizeProcessor : SingleImageProcessor
{
private readonly FilterType _filterType;
private readonly PixelInterpolateMethod _interpolateMethod;
public ResizeProcessor(FilterType filterType, PixelInterpolateMethod interpolateMethod)
{
_filterType = filterType;
_interpolateMethod = interpolateMethod;
}
#region Overrides of Processor<MagickImage,MagickImage>
protected override MagickImage Process(MagickImage image) => image
.ResizeImage(
2f,
_filterType,
_interpolateMethod
)
.Resample(300, DensityUnit.PixelsPerInch);
#endregion
}
@@ -1,64 +0,0 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
using Ocr.Tesseract.Screenshots.Configuration;
using System.Collections.Generic;
using System.Linq;
namespace Ocr.Tesseract.Screenshots;
/// <inheritdoc />
public class ScreenshotProcessor : ImageProcessor
{
/// <inheritdoc />
public ScreenshotProcessor(ScreenshotProcessorConfiguration configuration)
: base(configuration)
{
}
/// <inheritdoc cref="System.Diagnostics.Process"/>
public override IEnumerable<MagickImage> Process(MagickImage image)
{
var tImage = image.CloneImage();
if (Configuration.EnableResizing)
{
tImage = tImage
.ResizeImage(
2f,
FilterType.Lanczos2Sharp,
PixelInterpolateMethod.Mesh
)
.Resample(300, DensityUnit.PixelsPerInch);
yield return tImage.CloneImage();
}
if (Configuration.EnableThresholding)
{
tImage = tImage
.NormalizeImage()
.RemoveAlpha(MagickColors.White)
.ToGrayscale()
.ThresholdAdaptive(Configuration.ThresholdWidth, Configuration.ThresholdHeight)
.ToBinary();
}
if (Configuration.Border > 0)
{
tImage = tImage.AddBorder(Configuration.Border, MagickColors.White);
}
yield return tImage;
yield return tImage.CloneImage().NegateColors();
}
#region Overrides of Processor<MagickImage,IMagickImageValueProcessorSettings>
/// <inheritdoc />
public override IEnumerable<MagickImage> Process(IEnumerable<MagickImage> inputs)
{
return inputs.SelectMany(Process);
}
#endregion
}
@@ -0,0 +1,17 @@
using ImageMagick;
using Process.Abstract;
using System.Collections.Generic;
using System.Linq;
namespace Ocr.Tesseract.Screenshots;
public abstract class SingleImageProcessor : Processor<MagickImage, MagickImage>
{
/// <inheritdoc />
public override IEnumerable<MagickImage> Process(
IEnumerable<MagickImage> inputs
) => inputs.Select(Process);
protected abstract MagickImage Process(MagickImage image);
}
@@ -0,0 +1,20 @@
using ImageMagick;
namespace Ocr.Tesseract.Screenshots.Threshold;
public class AutoThresholdProcessor : SingleImageProcessor
{
private readonly AutoThresholdMethod _method;
public AutoThresholdProcessor(AutoThresholdMethod method)
{
_method = method;
}
/// <inheritdoc />
protected override MagickImage Process(MagickImage image)
{
image.AutoThreshold(_method);
return image;
}
}
@@ -0,0 +1,20 @@
using ImageMagick;
using Ocr.Tesseract.Extensions;
namespace Ocr.Tesseract.Screenshots.Threshold;
public class ThresholdAdaptiveProcessor : SingleImageProcessor
{
private readonly int _width;
private readonly int _height;
public ThresholdAdaptiveProcessor(int width, int height)
{
_width = width;
_height = height;
}
/// <inheritdoc />
protected override MagickImage Process(MagickImage image) => image
.ThresholdAdaptive(_width, _height);
}
@@ -0,0 +1,21 @@
using ImageMagick;
namespace Ocr.Tesseract.Screenshots.Threshold;
public class ThresholdProcessor : SingleImageProcessor
{
private readonly Percentage _percentage;
public ThresholdProcessor(int percentage)
{
_percentage = new Percentage(percentage);
}
/// <inheritdoc />
protected override MagickImage Process(MagickImage image)
{
image.Threshold(_percentage);
return image;
}
}