This repository has been archived on 2024-06-04. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
thesis-src/Examples/Common/ScreenshotScanner.cs
2023-08-10 09:04:36 +02:00

87 lines
2.7 KiB
C#

using ImageMagick;
using Lookup.Memory;
using Ocr.Tesseract;
using Ocr.Tesseract.Configuration;
using Ocr.Tesseract.Models;
using Ocr.Tesseract.Screenshots;
using Ocr.Tesseract.Screenshots.Configuration;
using Process.Abstract.Configuration;
using Process.Interface;
using System.Text.RegularExpressions;
namespace Common
{
/// <summary>
/// Scanner class, scanning <see cref="MagickImage"/>s for <see cref="Word"/>s
/// via optical character recognition. Optimized for digital Screenshots.
/// </summary>
public class ScreenshotScanner
{
private readonly IProcessor<MagickImage, ScanResult> _processor;
/// <summary>
/// <see cref="Regex"/> expression for extracting whole words from scan results
/// </summary>
private static readonly Regex wordRegex = new(
@"[\w'\-]{2,}",
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
);
/// <summary>
/// Data storage
/// </summary>
public Lookup.Interface.ILookup<Word, MagickImage> Lookup { get; } =
new MemoryLookup<Word, MagickImage>();
/// <summary>
/// Configuration of the <see cref="ImageProcessor"/>
/// </summary>
public ScreenshotProcessorConfiguration ImageProcessorConfiguration { get; set; } = new();
public ITesseractConfiguration TesseractConfiguration { get; set; } =
new TesseractScreenshotConfiguration();
/// <summary>
/// Constructor
/// </summary>
public ScreenshotScanner()
{
_processor = MakeProcessor();
}
/// <summary>
/// Process the provided <paramref name="images"/> and add the results to
/// the <see cref="Lookup"/>
/// </summary>
/// <param name="images">The <see cref="MagickImage"/>s to process</param>
public void Process(IEnumerable<MagickImage> images)
{
foreach (var kv in _processor.Process(images))
{
Lookup.Add(kv.Word, kv.Image);
}
}
private IProcessor<MagickImage, ScanResult> MakeProcessor()
{
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
.Use(new ScreenshotProcessor(ImageProcessorConfiguration)) // Preprocess input data
.Use(new ProcessingEvent<MagickImage>(OnProcessing)) // Scan
.Use(new TesseractProcessor(TesseractConfiguration)) // Scan
.Use(new ProcessingEvent<ScanResult>(OnProcessed)) // Scan
.Use(new ConfidenceFilter(50)) // Process output data
.Use(new DuplicateFilter())
.Use(new ToLowerProcessor())
.Complete(new RegexFilter(wordRegex));
}
protected virtual void OnProcessing(IProcessor sender, ICollection<MagickImage> inputs)
{
}
protected virtual void OnProcessed(IProcessor sender, ICollection<ScanResult> inputs)
{
}
}
}