using ImageMagick; using Lookup.Memory; using Ocr.Tesseract; using Ocr.Tesseract.Configuration; using Ocr.Tesseract.Models; using Ocr.Tesseract.Screenshots; using Ocr.Tesseract.Screenshots.Configuration; using Ocr.Tesseract.Screenshots.Threshold; using Process.Abstract.Configuration; using Process.Interface; using System.Text.RegularExpressions; namespace Common { /// /// Scanner class, scanning s for s /// via optical character recognition. Optimized for digital Screenshots. /// public class ScreenshotScanner { private readonly IProcessor _processor; /// /// expression for extracting whole words from scan results /// private static readonly Regex wordRegex = new( @"[\w'\-]{2,}", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase ); /// /// Data storage /// public Lookup.Interface.ILookup Lookup { get; } = new MemoryLookup(); /// /// Configuration of the /// public ScreenshotProcessorConfiguration ImageProcessorConfiguration { get; } public ITesseractConfiguration TesseractConfiguration { get; } /// /// Constructor /// public ScreenshotScanner( ScreenshotProcessorConfiguration imageProcessorConfig, ITesseractConfiguration tesseractConfig ) { ImageProcessorConfiguration = imageProcessorConfig; TesseractConfiguration = tesseractConfig; _processor = MakeProcessor(); } /// /// Process the provided and add the results to /// the /// /// The s to process public void Process(IEnumerable images) { foreach (var kv in _processor.Process(images)) { Lookup.Add(kv.Word, kv.Image); } } private IProcessor MakeProcessor() { var threshold = new ThresholdAdaptiveProcessor(ImageProcessorConfiguration.ThresholdWidth, ImageProcessorConfiguration.ThresholdHeight); // var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Kapur); // var threshold = new AutoThresholdProcessor(AutoThresholdMethod.OTSU); // var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Triangle); // var threshold = new ThresholdProcessor(60); var preprocessing = new ProcessorChainConfiguration() .Use(new CloneImageProcessor()) .Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh)) .Use(new NormalizeProcessor()) .Use(threshold) .Use(new AddBorderProcessor(10)) .Use(new BinarizeProcessor()) .Complete(new NegateCloneProcessor()); var postprocessing = new ProcessorChainConfiguration() .Use(new ConfidenceFilter(50)) .Use(new ToLowerProcessor()) .Use(new DuplicateFilter()) .Complete(new RegexFilter(wordRegex)); var scan = new TesseractProcessor(TesseractConfiguration); return new ProcessorChainConfiguration() .Use(preprocessing) .Use(new ProcessingEvent(OnProcessing)) .Use(scan) .Use(new ProcessingEvent(OnProcessed)) .Complete(postprocessing); } protected virtual void OnProcessing(IProcessor sender, ICollection inputs) { } protected virtual void OnProcessed(IProcessor sender, ICollection inputs) { } } }