87 lines
2.7 KiB
C#
87 lines
2.7 KiB
C#
using ImageMagick;
|
|
using Lookup.Memory;
|
|
using Ocr.Tesseract;
|
|
using Ocr.Tesseract.Configuration;
|
|
using Ocr.Tesseract.Models;
|
|
using Ocr.Tesseract.Screenshots;
|
|
using Ocr.Tesseract.Screenshots.Configuration;
|
|
using Process.Abstract.Configuration;
|
|
using Process.Interface;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace Common
|
|
{
|
|
/// <summary>
|
|
/// Scanner class, scanning <see cref="MagickImage"/>s for <see cref="Word"/>s
|
|
/// via optical character recognition. Optimized for digital Screenshots.
|
|
/// </summary>
|
|
public class ScreenshotScanner
|
|
{
|
|
private readonly IProcessor<MagickImage, ScanResult> _processor;
|
|
|
|
/// <summary>
|
|
/// <see cref="Regex"/> expression for extracting whole words from scan results
|
|
/// </summary>
|
|
private static readonly Regex wordRegex = new(
|
|
@"[\w'\-]{2,}",
|
|
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
|
|
);
|
|
|
|
/// <summary>
|
|
/// Data storage
|
|
/// </summary>
|
|
public Lookup.Interface.ILookup<Word, MagickImage> Lookup { get; } =
|
|
new MemoryLookup<Word, MagickImage>();
|
|
|
|
/// <summary>
|
|
/// Configuration of the <see cref="ImageProcessor"/>
|
|
/// </summary>
|
|
public ScreenshotProcessorConfiguration ImageProcessorConfiguration { get; set; } = new();
|
|
|
|
public ITesseractConfiguration TesseractConfiguration { get; set; } =
|
|
new TesseractScreenshotConfiguration();
|
|
|
|
/// <summary>
|
|
/// Constructor
|
|
/// </summary>
|
|
public ScreenshotScanner()
|
|
{
|
|
_processor = MakeProcessor();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Process the provided <paramref name="images"/> and add the results to
|
|
/// the <see cref="Lookup"/>
|
|
/// </summary>
|
|
/// <param name="images">The <see cref="MagickImage"/>s to process</param>
|
|
public void Process(IEnumerable<MagickImage> images)
|
|
{
|
|
foreach (var kv in _processor.Process(images))
|
|
{
|
|
Lookup.Add(kv.Word, kv.Image);
|
|
}
|
|
}
|
|
|
|
private IProcessor<MagickImage, ScanResult> MakeProcessor()
|
|
{
|
|
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
|
|
.Use(new ScreenshotProcessor(ImageProcessorConfiguration)) // Preprocess input data
|
|
.Use(new ProcessingEvent<MagickImage>(OnProcessing)) // Scan
|
|
.Use(new TesseractProcessor(TesseractConfiguration)) // Scan
|
|
.Use(new ProcessingEvent<ScanResult>(OnProcessed)) // Scan
|
|
.Use(new ConfidenceFilter(50)) // Process output data
|
|
.Use(new DuplicateFilter())
|
|
.Use(new ToLowerProcessor())
|
|
.Complete(new RegexFilter(wordRegex));
|
|
}
|
|
|
|
protected virtual void OnProcessing(IProcessor sender, ICollection<MagickImage> inputs)
|
|
{
|
|
}
|
|
|
|
protected virtual void OnProcessed(IProcessor sender, ICollection<ScanResult> inputs)
|
|
{
|
|
}
|
|
}
|
|
}
|