using ImageMagick;
using Lookup.Memory;
using Ocr.Tesseract;
using Ocr.Tesseract.Configuration;
using Ocr.Tesseract.Models;
using Ocr.Tesseract.Screenshots;
using Ocr.Tesseract.Screenshots.Configuration;
using Ocr.Tesseract.Screenshots.Threshold;
using Process.Abstract.Configuration;
using Process.Interface;
using System.Text.RegularExpressions;
namespace Common
{
///
/// Scanner class, scanning s for s
/// via optical character recognition. Optimized for digital Screenshots.
///
public class ScreenshotScanner
{
private readonly IProcessor _processor;
///
/// expression for extracting whole words from scan results
///
private static readonly Regex wordRegex = new(
@"[\w'\-]{2,}",
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
);
///
/// Data storage
///
public Lookup.Interface.ILookup Lookup { get; } =
new MemoryLookup();
///
/// Configuration of the
///
public ScreenshotProcessorConfiguration ImageProcessorConfiguration { get; }
public ITesseractConfiguration TesseractConfiguration { get; }
///
/// Constructor
///
public ScreenshotScanner(
ScreenshotProcessorConfiguration imageProcessorConfig,
ITesseractConfiguration tesseractConfig
)
{
ImageProcessorConfiguration = imageProcessorConfig;
TesseractConfiguration = tesseractConfig;
_processor = MakeProcessor();
}
///
/// Process the provided and add the results to
/// the
///
/// The s to process
public void Process(IEnumerable images)
{
foreach (var kv in _processor.Process(images))
{
Lookup.Add(kv.Word, kv.Image);
}
}
private IProcessor MakeProcessor()
{
var threshold =
new ThresholdAdaptiveProcessor(ImageProcessorConfiguration.ThresholdWidth,
ImageProcessorConfiguration.ThresholdHeight);
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Kapur);
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.OTSU);
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Triangle);
// var threshold = new ThresholdProcessor(60);
var preprocessing = new ProcessorChainConfiguration()
.Use(new CloneImageProcessor())
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
.Use(new NormalizeProcessor())
.Use(threshold)
.Use(new AddBorderProcessor(10))
.Use(new BinarizeProcessor())
.Complete(new NegateCloneProcessor());
var postprocessing = new ProcessorChainConfiguration()
.Use(new ConfidenceFilter(50))
.Use(new ToLowerProcessor())
.Use(new DuplicateFilter())
.Complete(new RegexFilter(wordRegex));
var scan = new TesseractProcessor(TesseractConfiguration);
return new ProcessorChainConfiguration()
.Use(preprocessing)
.Use(new ProcessingEvent(OnProcessing))
.Use(scan)
.Use(new ProcessingEvent(OnProcessed))
.Complete(postprocessing);
}
protected virtual void OnProcessing(IProcessor sender, ICollection inputs)
{
}
protected virtual void OnProcessed(IProcessor sender, ICollection inputs)
{
}
}
}