From fd26cf45e0f74baf330c277fa74897321abad3d9 Mon Sep 17 00:00:00 2001 From: Simon Gruber Date: Tue, 14 Nov 2023 14:51:16 +0100 Subject: [PATCH] Added split processors --- Examples/Common/ScreenshotScanner.cs | 37 +++++++++-- .../AddBorderProcessor.cs | 19 ++++++ .../BinarizeProcessor.cs | 10 +++ .../CloneImageProcessor.cs | 10 +++ .../NegateCloneProcessor.cs | 19 ++++++ .../NormalizeProcessor.cs | 13 ++++ .../ResizeProcessor.cs | 28 ++++++++ .../ScreenshotProcessor.cs | 64 ------------------- .../SingleImageProcessor.cs | 17 +++++ .../Threshold/AutoThresholdProcessor.cs | 20 ++++++ .../Threshold/ThresholdAdaptiveProcessor.cs | 20 ++++++ .../Threshold/ThresholdProcessor.cs | 21 ++++++ 12 files changed, 207 insertions(+), 71 deletions(-) create mode 100644 Ocr/Ocr.Tesseract.Screenshots/AddBorderProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/BinarizeProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/CloneImageProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/NegateCloneProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/NormalizeProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/ResizeProcessor.cs delete mode 100644 Ocr/Ocr.Tesseract.Screenshots/ScreenshotProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/SingleImageProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/Threshold/AutoThresholdProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdAdaptiveProcessor.cs create mode 100644 Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdProcessor.cs diff --git a/Examples/Common/ScreenshotScanner.cs b/Examples/Common/ScreenshotScanner.cs index cc61e7e..9a1e4a8 100644 --- a/Examples/Common/ScreenshotScanner.cs +++ b/Examples/Common/ScreenshotScanner.cs @@ -5,6 +5,7 @@ using Ocr.Tesseract.Configuration; using Ocr.Tesseract.Models; using Ocr.Tesseract.Screenshots; using Ocr.Tesseract.Screenshots.Configuration; +using Ocr.Tesseract.Screenshots.Threshold; using Process.Abstract.Configuration; using Process.Interface; using System.Text.RegularExpressions; @@ -69,15 +70,37 @@ namespace Common private IProcessor MakeProcessor() { - return new ProcessorChainConfiguration() - .Use(new ScreenshotProcessor(ImageProcessorConfiguration)) // Preprocess input data - .Use(new ProcessingEvent(OnProcessing)) // Scan - .Use(new TesseractProcessor(TesseractConfiguration)) // Scan - .Use(new ProcessingEvent(OnProcessed)) // Scan - .Use(new ConfidenceFilter(50)) // Process output data - .Use(new DuplicateFilter()) + var threshold = + new ThresholdAdaptiveProcessor(ImageProcessorConfiguration.ThresholdWidth, + ImageProcessorConfiguration.ThresholdHeight); + // var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Kapur); + // var threshold = new AutoThresholdProcessor(AutoThresholdMethod.OTSU); + // var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Triangle); + // var threshold = new ThresholdProcessor(60); + + var preprocessing = new ProcessorChainConfiguration() + .Use(new CloneImageProcessor()) + .Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh)) + .Use(new NormalizeProcessor()) + .Use(threshold) + .Use(new AddBorderProcessor(10)) + .Use(new BinarizeProcessor()) + .Complete(new NegateCloneProcessor()); + + var postprocessing = new ProcessorChainConfiguration() + .Use(new ConfidenceFilter(50)) .Use(new ToLowerProcessor()) + .Use(new DuplicateFilter()) .Complete(new RegexFilter(wordRegex)); + + var scan = new TesseractProcessor(TesseractConfiguration); + + return new ProcessorChainConfiguration() + .Use(preprocessing) + .Use(new ProcessingEvent(OnProcessing)) + .Use(scan) + .Use(new ProcessingEvent(OnProcessed)) + .Complete(postprocessing); } protected virtual void OnProcessing(IProcessor sender, ICollection inputs) diff --git a/Ocr/Ocr.Tesseract.Screenshots/AddBorderProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/AddBorderProcessor.cs new file mode 100644 index 0000000..7e81594 --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/AddBorderProcessor.cs @@ -0,0 +1,19 @@ +using ImageMagick; +using Ocr.Tesseract.Extensions; + +namespace Ocr.Tesseract.Screenshots; + +public class AddBorderProcessor : SingleImageProcessor +{ + private readonly int _thickness; + + public AddBorderProcessor(int thickness) + { + _thickness = thickness; + } + + /// + protected override MagickImage Process(MagickImage image) => _thickness > 0 + ? image.AddBorder(_thickness, MagickColors.White) + : image; +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/BinarizeProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/BinarizeProcessor.cs new file mode 100644 index 0000000..88b231e --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/BinarizeProcessor.cs @@ -0,0 +1,10 @@ +using ImageMagick; +using Ocr.Tesseract.Extensions; + +namespace Ocr.Tesseract.Screenshots; + +public class BinarizeProcessor : SingleImageProcessor +{ + /// + protected override MagickImage Process(MagickImage image) => image.ToBinary(); +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/CloneImageProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/CloneImageProcessor.cs new file mode 100644 index 0000000..4d418e4 --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/CloneImageProcessor.cs @@ -0,0 +1,10 @@ +using ImageMagick; +using Ocr.Tesseract.Extensions; + +namespace Ocr.Tesseract.Screenshots; + +public class CloneImageProcessor : SingleImageProcessor +{ + /// + protected override MagickImage Process(MagickImage image) => image.CloneImage(); +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/NegateCloneProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/NegateCloneProcessor.cs new file mode 100644 index 0000000..4367c5f --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/NegateCloneProcessor.cs @@ -0,0 +1,19 @@ +using ImageMagick; +using Ocr.Tesseract.Extensions; +using Process.Abstract; +using System.Collections.Generic; + +namespace Ocr.Tesseract.Screenshots; + +public class NegateCloneProcessor : Processor +{ + /// + public override IEnumerable Process(IEnumerable inputs) + { + foreach (var input in inputs) + { + yield return input; + yield return input.CloneImage().NegateColors(); + } + } +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/NormalizeProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/NormalizeProcessor.cs new file mode 100644 index 0000000..e95e9bf --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/NormalizeProcessor.cs @@ -0,0 +1,13 @@ +using ImageMagick; +using Ocr.Tesseract.Extensions; + +namespace Ocr.Tesseract.Screenshots; + +public class NormalizeProcessor : SingleImageProcessor +{ + /// + protected override MagickImage Process(MagickImage image) => image + .NormalizeImage() + .RemoveAlpha(MagickColors.White) + .ToGrayscale(); +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/ResizeProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/ResizeProcessor.cs new file mode 100644 index 0000000..37f2b03 --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/ResizeProcessor.cs @@ -0,0 +1,28 @@ +using ImageMagick; +using Ocr.Tesseract.Extensions; + +namespace Ocr.Tesseract.Screenshots; + +public class ResizeProcessor : SingleImageProcessor +{ + private readonly FilterType _filterType; + private readonly PixelInterpolateMethod _interpolateMethod; + + public ResizeProcessor(FilterType filterType, PixelInterpolateMethod interpolateMethod) + { + _filterType = filterType; + _interpolateMethod = interpolateMethod; + } + + #region Overrides of Processor + + protected override MagickImage Process(MagickImage image) => image + .ResizeImage( + 2f, + _filterType, + _interpolateMethod + ) + .Resample(300, DensityUnit.PixelsPerInch); + + #endregion +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/ScreenshotProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/ScreenshotProcessor.cs deleted file mode 100644 index 9667e1c..0000000 --- a/Ocr/Ocr.Tesseract.Screenshots/ScreenshotProcessor.cs +++ /dev/null @@ -1,64 +0,0 @@ -using ImageMagick; -using Ocr.Tesseract.Extensions; -using Ocr.Tesseract.Screenshots.Configuration; -using System.Collections.Generic; -using System.Linq; - -namespace Ocr.Tesseract.Screenshots; - -/// -public class ScreenshotProcessor : ImageProcessor -{ - /// - public ScreenshotProcessor(ScreenshotProcessorConfiguration configuration) - : base(configuration) - { - } - - /// - public override IEnumerable Process(MagickImage image) - { - var tImage = image.CloneImage(); - - if (Configuration.EnableResizing) - { - tImage = tImage - .ResizeImage( - 2f, - FilterType.Lanczos2Sharp, - PixelInterpolateMethod.Mesh - ) - .Resample(300, DensityUnit.PixelsPerInch); - - yield return tImage.CloneImage(); - } - - if (Configuration.EnableThresholding) - { - tImage = tImage - .NormalizeImage() - .RemoveAlpha(MagickColors.White) - .ToGrayscale() - .ThresholdAdaptive(Configuration.ThresholdWidth, Configuration.ThresholdHeight) - .ToBinary(); - } - - if (Configuration.Border > 0) - { - tImage = tImage.AddBorder(Configuration.Border, MagickColors.White); - } - - yield return tImage; - yield return tImage.CloneImage().NegateColors(); - } - - #region Overrides of Processor - - /// - public override IEnumerable Process(IEnumerable inputs) - { - return inputs.SelectMany(Process); - } - - #endregion -} diff --git a/Ocr/Ocr.Tesseract.Screenshots/SingleImageProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/SingleImageProcessor.cs new file mode 100644 index 0000000..0022b10 --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/SingleImageProcessor.cs @@ -0,0 +1,17 @@ +using ImageMagick; +using Process.Abstract; +using System.Collections.Generic; +using System.Linq; + +namespace Ocr.Tesseract.Screenshots; + +public abstract class SingleImageProcessor : Processor +{ + /// + public override IEnumerable Process( + IEnumerable inputs + ) => inputs.Select(Process); + + + protected abstract MagickImage Process(MagickImage image); +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/Threshold/AutoThresholdProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/Threshold/AutoThresholdProcessor.cs new file mode 100644 index 0000000..3a987d0 --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/Threshold/AutoThresholdProcessor.cs @@ -0,0 +1,20 @@ +using ImageMagick; + +namespace Ocr.Tesseract.Screenshots.Threshold; + +public class AutoThresholdProcessor : SingleImageProcessor +{ + private readonly AutoThresholdMethod _method; + + public AutoThresholdProcessor(AutoThresholdMethod method) + { + _method = method; + } + + /// + protected override MagickImage Process(MagickImage image) + { + image.AutoThreshold(_method); + return image; + } +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdAdaptiveProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdAdaptiveProcessor.cs new file mode 100644 index 0000000..309a397 --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdAdaptiveProcessor.cs @@ -0,0 +1,20 @@ +using ImageMagick; +using Ocr.Tesseract.Extensions; + +namespace Ocr.Tesseract.Screenshots.Threshold; + +public class ThresholdAdaptiveProcessor : SingleImageProcessor +{ + private readonly int _width; + private readonly int _height; + + public ThresholdAdaptiveProcessor(int width, int height) + { + _width = width; + _height = height; + } + + /// + protected override MagickImage Process(MagickImage image) => image + .ThresholdAdaptive(_width, _height); +} \ No newline at end of file diff --git a/Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdProcessor.cs b/Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdProcessor.cs new file mode 100644 index 0000000..ccd0e28 --- /dev/null +++ b/Ocr/Ocr.Tesseract.Screenshots/Threshold/ThresholdProcessor.cs @@ -0,0 +1,21 @@ +using ImageMagick; + +namespace Ocr.Tesseract.Screenshots.Threshold; + +public class ThresholdProcessor : SingleImageProcessor +{ + private readonly Percentage _percentage; + + + public ThresholdProcessor(int percentage) + { + _percentage = new Percentage(percentage); + } + + /// + protected override MagickImage Process(MagickImage image) + { + image.Threshold(_percentage); + return image; + } +} \ No newline at end of file