a
This commit is contained in:
+24
-1
@@ -8,32 +8,55 @@
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\Ocr\Ocr.Processors\Ocr.Processors.csproj" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Update="img\command-processing_screentypes_controlgroup_005.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\command-processing_screentypes_controlgroup_005.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\editor_startpage_project-exist_001.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\editor_startpage_project-exist_001.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\editor_windows_position_006.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\editor_windows_position_006.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\historian_assistent_001.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\historian_assistent_001.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_MetadataEditor_variables_001.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_MetadataEditor_variables_001.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_REPORTS_EfficencyClass_009.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_REPORTS_EfficencyClass_009.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_ZAMS_3rd-connector_014.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_ZAMS_3rd-connector_014.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_ZAMS_filter-alarmgroup_001.json">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
<None Update="img\zrs_ZAMS_filter-alarmgroup_001.png">
|
||||
<CopyToOutputDirectory>Always</CopyToOutputDirectory>
|
||||
</None>
|
||||
|
||||
@@ -0,0 +1,101 @@
|
||||
using ImageMagick;
|
||||
using Ocr.Tesseract;
|
||||
using Ocr.Tesseract.Configuration;
|
||||
using Ocr.Tesseract.Extensions;
|
||||
using Ocr.Tesseract.Models;
|
||||
using Ocr.Tesseract.Screenshots;
|
||||
using Ocr.Tesseract.Screenshots.Configuration;
|
||||
using Process.Abstract.Configuration;
|
||||
using Process.Interface;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
internal class EvaluationProcessor
|
||||
{
|
||||
/// <summary>
|
||||
/// <see cref="Regex"/> expression for extracting whole words from scan results
|
||||
/// </summary>
|
||||
private static Regex WordRegex = new(
|
||||
@"[\w'\-]{2,}",
|
||||
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
|
||||
);
|
||||
|
||||
private static ITesseractConfiguration TesseractConfig = new TesseractScreenshotConfiguration()
|
||||
{
|
||||
DataPath = "tessdata",
|
||||
Languages = new[] { "eng", "deu" }
|
||||
};
|
||||
|
||||
private ScreenshotProcessorConfiguration ProcessorConfig = new ScreenshotProcessorConfiguration
|
||||
{
|
||||
Border = 0,
|
||||
EnableResizing = false,
|
||||
EnableThresholding = false,
|
||||
FilterConnectedComponents = false,
|
||||
ThresholdHeight = 0,
|
||||
ThresholdWidth = 0
|
||||
};
|
||||
|
||||
private static readonly TesseractProcessor tesseractProcessor = new(TesseractConfig);
|
||||
|
||||
private static readonly IProcessorChain<ScanResult, ScanResult> postProcessor =
|
||||
new ProcessorChainConfiguration<ScanResult, ScanResult>()
|
||||
.Use(new ConfidenceFilter(50))
|
||||
.Use(new ToLowerProcessor())
|
||||
.Use(new DuplicateFilter())
|
||||
.Complete(new RegexFilter(WordRegex));
|
||||
|
||||
private readonly IProcessor<MagickImage, MagickImage> _thresholdProcessor;
|
||||
|
||||
public string OutputFolder { get; init; } = "results";
|
||||
|
||||
public EvaluationProcessor(IProcessor<MagickImage, MagickImage> thresholdProcessor)
|
||||
{
|
||||
_thresholdProcessor = thresholdProcessor;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public Task Process(MagickImage image) => Task.Run(async () =>
|
||||
{
|
||||
Directory.CreateDirectory(OutputFolder);
|
||||
|
||||
var processor = MakeProcessor();
|
||||
var results = processor.Process(new[] { image }).Select(r => r.Word);
|
||||
|
||||
var name = Path.GetFileNameWithoutExtension(image.FileName);
|
||||
await using var file = File.OpenWrite(Path.Combine(OutputFolder, $"{name}.{_thresholdProcessor}.json"));
|
||||
await JsonSerializer.SerializeAsync(file, results);
|
||||
});
|
||||
|
||||
private IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
||||
{
|
||||
var preprocessing = new ProcessorChainConfiguration<MagickImage, MagickImage>()
|
||||
.Use(new CloneImageProcessor())
|
||||
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
|
||||
.Use(new NormalizeProcessor())
|
||||
.Use(_thresholdProcessor)
|
||||
.Use(new AddBorderProcessor(10))
|
||||
.Use(new BinarizeProcessor())
|
||||
.Use(new NegateCloneProcessor())
|
||||
.Complete(OnPreprocessed);
|
||||
|
||||
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
|
||||
.Use(preprocessing)
|
||||
.Use(tesseractProcessor)
|
||||
.Complete(postProcessor);
|
||||
}
|
||||
|
||||
private IEnumerable<MagickImage> OnPreprocessed(IEnumerable<MagickImage> images)
|
||||
{
|
||||
var tImages = images.ToArray();
|
||||
|
||||
for (var i = 0; i < tImages.Length; i++)
|
||||
{
|
||||
var image = tImages[i].CloneImage();
|
||||
var name = Path.GetFileName(image.FileName);
|
||||
image.Write(Path.Combine(OutputFolder, $"{_thresholdProcessor}.{i:D2}.{name}"));
|
||||
}
|
||||
|
||||
return tImages;
|
||||
}
|
||||
}
|
||||
+93
-11
@@ -1,16 +1,98 @@
|
||||
using Common;
|
||||
using Common.Extensions;
|
||||
using Common.Extensions;
|
||||
using ImageMagick;
|
||||
using Ocr.Tesseract.Screenshots.Threshold;
|
||||
using System.Diagnostics.CodeAnalysis;
|
||||
using System.Text;
|
||||
|
||||
var scanner = new ScreenshotScanner();
|
||||
namespace CLI;
|
||||
|
||||
Console.WriteLine($"# Scanning: {string.Join(',', args)}...");
|
||||
scanner.Process(GetImages(args));
|
||||
public class Program
|
||||
{
|
||||
private readonly IDictionary<TaskStatus, string> _statusMap = new Dictionary<TaskStatus, string>()
|
||||
{
|
||||
{ TaskStatus.RanToCompletion, "DONE" },
|
||||
{ TaskStatus.Faulted, "FAULT" },
|
||||
{ TaskStatus.Canceled, "CANCL" },
|
||||
{ TaskStatus.Created, "WAIT" },
|
||||
{ TaskStatus.WaitingToRun, "WAIT" },
|
||||
{ TaskStatus.WaitingForActivation, "WAIT" },
|
||||
{ TaskStatus.Running, "RUN" },
|
||||
{ TaskStatus.WaitingForChildrenToComplete, "RUN" },
|
||||
};
|
||||
|
||||
Console.WriteLine($"# Results ({scanner.Lookup.Keys.Count}):");
|
||||
Console.WriteLine(string.Join(' ', scanner.Lookup.Keys));
|
||||
public Task Run(string[] args)
|
||||
{
|
||||
var scans = (
|
||||
from processor in MakeThresholdVariations()
|
||||
from path in ExpandPaths(args)
|
||||
select (Key: path, Task: processor.Process(new MagickImage(path)))
|
||||
).ToArray();
|
||||
|
||||
static IEnumerable<MagickImage> GetImages(IEnumerable<string> paths) => paths
|
||||
.SelectMany(p => p.ExpandPath())
|
||||
.Select(p => new MagickImage(p))
|
||||
.ToArray();
|
||||
var waitTask = Task.WhenAll(scans.Select(i => i.Task));
|
||||
|
||||
while (!waitTask.Wait(TimeSpan.FromSeconds(1)))
|
||||
{
|
||||
var sb = new StringBuilder(scans.Length * 30);
|
||||
|
||||
foreach (var info in scans)
|
||||
{
|
||||
sb.AppendLine($"{_statusMap[info.Task.Status],-5}: {info.Key}");
|
||||
|
||||
if (info.Task.Exception is not null)
|
||||
{
|
||||
sb.AppendLine($"> EX: {info.Task.Exception?.Message}");
|
||||
}
|
||||
}
|
||||
|
||||
sb.AppendLine();
|
||||
|
||||
Console.Clear();
|
||||
Console.Write(sb.ToString());
|
||||
}
|
||||
|
||||
return waitTask;
|
||||
}
|
||||
|
||||
[SuppressMessage("ReSharper", "ArrangeObjectCreationWhenTypeNotEvident")]
|
||||
private static IEnumerable<EvaluationProcessor> MakeThresholdVariations()
|
||||
{
|
||||
yield return new(new ThresholdAdaptiveProcessor(17));
|
||||
yield return new(new ThresholdAdaptiveProcessor(5));
|
||||
yield return new(new ThresholdAdaptiveProcessor(10));
|
||||
yield return new(new ThresholdAdaptiveProcessor(15));
|
||||
yield return new(new ThresholdProcessor(20));
|
||||
yield return new(new ThresholdProcessor(40));
|
||||
yield return new(new ThresholdProcessor(60));
|
||||
yield return new(new ThresholdProcessor(80));
|
||||
yield return new(new AutoThresholdProcessor(AutoThresholdMethod.Kapur));
|
||||
yield return new(new AutoThresholdProcessor(AutoThresholdMethod.OTSU));
|
||||
yield return new(new AutoThresholdProcessor(AutoThresholdMethod.Triangle));
|
||||
}
|
||||
|
||||
private static IEnumerable<string> ExpandPaths(params string[] paths) =>
|
||||
paths.SelectMany(p => p.ExpandPath());
|
||||
|
||||
#region Main
|
||||
|
||||
public static int Main(string[] args)
|
||||
{
|
||||
Console.WriteLine("Starting up");
|
||||
|
||||
try
|
||||
{
|
||||
new Program()
|
||||
.Run(args)
|
||||
.Wait();
|
||||
|
||||
Console.WriteLine("Completed");
|
||||
return 0;
|
||||
}
|
||||
catch (Exception e)
|
||||
{
|
||||
Console.WriteLine(e.Message);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"words": [
|
||||
"interlockings",
|
||||
"active",
|
||||
"interlocking",
|
||||
"text",
|
||||
"typ",
|
||||
"static",
|
||||
"id",
|
||||
"10034",
|
||||
"no"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,80 @@
|
||||
{
|
||||
"words": [
|
||||
"connection",
|
||||
"create",
|
||||
"driver",
|
||||
"variable",
|
||||
"visualization",
|
||||
"frame",
|
||||
"screen",
|
||||
"execution",
|
||||
"function",
|
||||
"first",
|
||||
"steps",
|
||||
"welcome",
|
||||
"to",
|
||||
"the",
|
||||
"engineering",
|
||||
"studio",
|
||||
"we",
|
||||
"want",
|
||||
"make",
|
||||
"your",
|
||||
"in",
|
||||
"easy",
|
||||
"this",
|
||||
"end",
|
||||
"are",
|
||||
"introducing",
|
||||
"you",
|
||||
"basic",
|
||||
"for",
|
||||
"configuring",
|
||||
"a",
|
||||
"project",
|
||||
"following",
|
||||
"chapters",
|
||||
"how",
|
||||
"work",
|
||||
"and",
|
||||
"what",
|
||||
"need",
|
||||
"projects",
|
||||
"can",
|
||||
"incorporate",
|
||||
"data",
|
||||
"points",
|
||||
"into",
|
||||
"is",
|
||||
"tool",
|
||||
"control",
|
||||
"of",
|
||||
"processes",
|
||||
"generally",
|
||||
"do",
|
||||
"not",
|
||||
"anything",
|
||||
"because",
|
||||
"properties",
|
||||
"dialogs",
|
||||
"setting",
|
||||
"parameters",
|
||||
"them",
|
||||
"if",
|
||||
"rproject",
|
||||
"has",
|
||||
"been",
|
||||
"configured",
|
||||
"compile",
|
||||
"it",
|
||||
"start",
|
||||
"service",
|
||||
"engine",
|
||||
"then",
|
||||
"see",
|
||||
"front",
|
||||
"application",
|
||||
"back",
|
||||
"next"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
{
|
||||
"words": [
|
||||
"zenon",
|
||||
"energy",
|
||||
"edition",
|
||||
"file",
|
||||
"edit",
|
||||
"screens",
|
||||
"options",
|
||||
"window",
|
||||
"help",
|
||||
"100",
|
||||
"activatestartupscreen",
|
||||
"properties",
|
||||
"cross",
|
||||
"reference",
|
||||
"list",
|
||||
"report",
|
||||
"start",
|
||||
"variables",
|
||||
"functions",
|
||||
"language",
|
||||
"historian",
|
||||
"recipes",
|
||||
"time",
|
||||
"control",
|
||||
"windows",
|
||||
"CE",
|
||||
"filter",
|
||||
"text",
|
||||
"total",
|
||||
"filtered",
|
||||
"select",
|
||||
"project",
|
||||
"tree",
|
||||
"network",
|
||||
"topology",
|
||||
"welcome",
|
||||
"to",
|
||||
"the",
|
||||
"property",
|
||||
"shows",
|
||||
"you",
|
||||
"brief",
|
||||
"information",
|
||||
"each",
|
||||
"propery",
|
||||
"what",
|
||||
"is",
|
||||
"needed",
|
||||
"for",
|
||||
"its",
|
||||
"standard",
|
||||
"value",
|
||||
"from",
|
||||
"other",
|
||||
"depended",
|
||||
"on",
|
||||
"etc",
|
||||
"a",
|
||||
"link",
|
||||
"online",
|
||||
"also",
|
||||
"provided",
|
||||
"and",
|
||||
"name",
|
||||
"VBA",
|
||||
"VSTA",
|
||||
"displayed",
|
||||
"output",
|
||||
"load",
|
||||
"DOKU",
|
||||
"ready",
|
||||
"elemente"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"words": [
|
||||
"start",
|
||||
"page",
|
||||
"create",
|
||||
"archive",
|
||||
"archives",
|
||||
"serve",
|
||||
"for",
|
||||
"the",
|
||||
"recording",
|
||||
"of",
|
||||
"process",
|
||||
"values",
|
||||
"over",
|
||||
"a",
|
||||
"longer",
|
||||
"period",
|
||||
"time",
|
||||
"recorded",
|
||||
"data",
|
||||
"can",
|
||||
"be",
|
||||
"summarized",
|
||||
"in",
|
||||
"aggregated",
|
||||
"sum",
|
||||
"average",
|
||||
"value",
|
||||
"minimum",
|
||||
"and",
|
||||
"maximum",
|
||||
"certain",
|
||||
"used",
|
||||
"extended",
|
||||
"trend",
|
||||
"this",
|
||||
"assistant",
|
||||
"should",
|
||||
"help",
|
||||
"you",
|
||||
"with",
|
||||
"basic",
|
||||
"engineering",
|
||||
"an",
|
||||
"if",
|
||||
"want",
|
||||
"to",
|
||||
"teditor",
|
||||
"without",
|
||||
"press",
|
||||
"cancel",
|
||||
"now",
|
||||
"back",
|
||||
"next"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,56 @@
|
||||
{
|
||||
"words": [
|
||||
"metadata",
|
||||
"editor",
|
||||
"file",
|
||||
"edit",
|
||||
"help",
|
||||
"equipment",
|
||||
"modeling",
|
||||
"event",
|
||||
"classes",
|
||||
"groups",
|
||||
"users",
|
||||
"project",
|
||||
"variables",
|
||||
"archives",
|
||||
"efficiency",
|
||||
"class",
|
||||
"models",
|
||||
"reference",
|
||||
"curves",
|
||||
"name",
|
||||
"visual",
|
||||
"description",
|
||||
"apply",
|
||||
"clear",
|
||||
"glass",
|
||||
"bottle",
|
||||
"line",
|
||||
"WS",
|
||||
"Cur",
|
||||
"State",
|
||||
"depalletizer",
|
||||
"unpacker",
|
||||
"washer",
|
||||
"filler",
|
||||
"pasteurizer",
|
||||
"labeler",
|
||||
"conveyor",
|
||||
"grate",
|
||||
"inspector",
|
||||
"operation",
|
||||
"packer",
|
||||
"connected",
|
||||
"to",
|
||||
"reporting",
|
||||
"on",
|
||||
"testenv",
|
||||
"local",
|
||||
"total",
|
||||
"filtered",
|
||||
"selected",
|
||||
"changed",
|
||||
"ready"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
{
|
||||
"words": [
|
||||
"new",
|
||||
"report",
|
||||
"theme",
|
||||
"efficiency",
|
||||
"class",
|
||||
"analysis",
|
||||
"this",
|
||||
"contains",
|
||||
"templates",
|
||||
"that",
|
||||
"allow",
|
||||
"based",
|
||||
"on",
|
||||
"models",
|
||||
"the",
|
||||
"data",
|
||||
"can",
|
||||
"be",
|
||||
"processed",
|
||||
"archives",
|
||||
"equipment",
|
||||
"groups",
|
||||
"or",
|
||||
"formulas",
|
||||
"template",
|
||||
"formula",
|
||||
"reports",
|
||||
"perform",
|
||||
"for",
|
||||
"period",
|
||||
"1",
|
||||
"historic",
|
||||
"is",
|
||||
"normalised",
|
||||
"to",
|
||||
"a",
|
||||
"defined",
|
||||
"in",
|
||||
"model",
|
||||
"cen",
|
||||
"shown",
|
||||
"an",
|
||||
"diagram",
|
||||
"and",
|
||||
"tables",
|
||||
"preview",
|
||||
"from",
|
||||
"costs",
|
||||
"hour",
|
||||
"name",
|
||||
"lower",
|
||||
"limit",
|
||||
"higher",
|
||||
"EUR",
|
||||
"value",
|
||||
"current",
|
||||
"zenon",
|
||||
"analyzer",
|
||||
"www",
|
||||
"copadata",
|
||||
"com",
|
||||
"ok",
|
||||
"cancel",
|
||||
"page"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
{
|
||||
"words": [
|
||||
"metadata",
|
||||
"datasource",
|
||||
"variables",
|
||||
"project",
|
||||
"glass",
|
||||
"bottle",
|
||||
"line",
|
||||
"identification",
|
||||
"visualname",
|
||||
"description",
|
||||
"cycle",
|
||||
"time",
|
||||
"dd",
|
||||
"hh",
|
||||
"mm",
|
||||
"ss",
|
||||
"assigned",
|
||||
"equipment",
|
||||
"groups",
|
||||
"plant",
|
||||
"media",
|
||||
"total",
|
||||
"previous",
|
||||
"next",
|
||||
"ok",
|
||||
"cancel",
|
||||
"new",
|
||||
"archive",
|
||||
"for",
|
||||
"the",
|
||||
"3rd",
|
||||
"party",
|
||||
"database",
|
||||
"connector"
|
||||
]
|
||||
}
|
||||
@@ -0,0 +1,22 @@
|
||||
{
|
||||
"words": [
|
||||
"alarm",
|
||||
"group",
|
||||
"prefilter",
|
||||
"static",
|
||||
"enable",
|
||||
"alarm",
|
||||
"group",
|
||||
"prefiltering",
|
||||
"machine",
|
||||
"alarm",
|
||||
"emergency",
|
||||
"stop",
|
||||
"external",
|
||||
"failure",
|
||||
"eqauipment",
|
||||
"select",
|
||||
"all",
|
||||
"deselect"
|
||||
]
|
||||
}
|
||||
@@ -1,14 +1,7 @@
|
||||
using ImageMagick;
|
||||
using Lookup.Memory;
|
||||
using Ocr.Tesseract;
|
||||
using Ocr.Tesseract.Configuration;
|
||||
using Ocr.Tesseract.Models;
|
||||
using Ocr.Tesseract.Screenshots;
|
||||
using Ocr.Tesseract.Screenshots.Configuration;
|
||||
using Ocr.Tesseract.Screenshots.Threshold;
|
||||
using Process.Abstract.Configuration;
|
||||
using Process.Interface;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Common
|
||||
{
|
||||
@@ -18,15 +11,10 @@ namespace Common
|
||||
/// </summary>
|
||||
public class ScreenshotScanner
|
||||
{
|
||||
private readonly IProcessor<MagickImage, ScanResult> _processor;
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="Regex"/> expression for extracting whole words from scan results
|
||||
/// The screenshot processor
|
||||
/// </summary>
|
||||
private static readonly Regex wordRegex = new(
|
||||
@"[\w'\-]{2,}",
|
||||
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
|
||||
);
|
||||
protected IProcessor<MagickImage, ScanResult> Processor { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Data storage
|
||||
@@ -34,25 +22,12 @@ namespace Common
|
||||
public Lookup.Interface.ILookup<Word, MagickImage> Lookup { get; } =
|
||||
new MemoryLookup<Word, MagickImage>();
|
||||
|
||||
/// <summary>
|
||||
/// Configuration of the <see cref="ImageProcessor"/>
|
||||
/// </summary>
|
||||
public ScreenshotProcessorConfiguration ImageProcessorConfiguration { get; }
|
||||
|
||||
public ITesseractConfiguration TesseractConfiguration { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Constructor
|
||||
/// </summary>
|
||||
public ScreenshotScanner(
|
||||
ScreenshotProcessorConfiguration imageProcessorConfig,
|
||||
ITesseractConfiguration tesseractConfig
|
||||
)
|
||||
public ScreenshotScanner(IProcessor<MagickImage, ScanResult> processor)
|
||||
{
|
||||
ImageProcessorConfiguration = imageProcessorConfig;
|
||||
TesseractConfiguration = tesseractConfig;
|
||||
|
||||
_processor = MakeProcessor();
|
||||
Processor = processor;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
@@ -62,53 +37,15 @@ namespace Common
|
||||
/// <param name="images">The <see cref="MagickImage"/>s to process</param>
|
||||
public void Process(IEnumerable<MagickImage> images)
|
||||
{
|
||||
foreach (var kv in _processor.Process(images))
|
||||
foreach (var kv in Processor.Process(images))
|
||||
{
|
||||
Lookup.Add(kv.Word, kv.Image);
|
||||
}
|
||||
}
|
||||
|
||||
private IProcessor<MagickImage, ScanResult> MakeProcessor()
|
||||
{
|
||||
var threshold =
|
||||
new ThresholdAdaptiveProcessor(ImageProcessorConfiguration.ThresholdWidth,
|
||||
ImageProcessorConfiguration.ThresholdHeight);
|
||||
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Kapur);
|
||||
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.OTSU);
|
||||
// var threshold = new AutoThresholdProcessor(AutoThresholdMethod.Triangle);
|
||||
// var threshold = new ThresholdProcessor(60);
|
||||
|
||||
var preprocessing = new ProcessorChainConfiguration<MagickImage, MagickImage>()
|
||||
.Use(new CloneImageProcessor())
|
||||
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
|
||||
.Use(new NormalizeProcessor())
|
||||
.Use(threshold)
|
||||
.Use(new AddBorderProcessor(10))
|
||||
.Use(new BinarizeProcessor())
|
||||
.Complete(new NegateCloneProcessor());
|
||||
|
||||
var postprocessing = new ProcessorChainConfiguration<ScanResult, ScanResult>()
|
||||
.Use(new ConfidenceFilter(50))
|
||||
.Use(new ToLowerProcessor())
|
||||
.Use(new DuplicateFilter())
|
||||
.Complete(new RegexFilter(wordRegex));
|
||||
|
||||
var scan = new TesseractProcessor(TesseractConfiguration);
|
||||
|
||||
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
|
||||
.Use(preprocessing)
|
||||
.Use(new ProcessingEvent<MagickImage>(OnProcessing))
|
||||
.Use(scan)
|
||||
.Use(new ProcessingEvent<ScanResult>(OnProcessed))
|
||||
.Complete(postprocessing);
|
||||
}
|
||||
|
||||
protected virtual void OnProcessing(IProcessor sender, ICollection<MagickImage> inputs)
|
||||
{
|
||||
}
|
||||
|
||||
protected virtual void OnProcessed(IProcessor sender, ICollection<ScanResult> inputs)
|
||||
public virtual void Clear()
|
||||
{
|
||||
Lookup.Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -35,6 +35,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ocr.Tesseract", "..\Ocr\Ocr
|
||||
EndProject
|
||||
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "Ocr.Tesseract.Screenshots", "..\Ocr\Ocr.Tesseract.Screenshots\Ocr.Tesseract.Screenshots.csproj", "{251F9AC9-3765-498C-83FD-DB3539A19CB3}"
|
||||
EndProject
|
||||
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "ReportGenerator", "ReportGenerator\ReportGenerator.csproj", "{729CB7AA-AB0D-4C39-AA17-7435E61FA0A6}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Any CPU = Debug|Any CPU
|
||||
@@ -89,6 +91,10 @@ Global
|
||||
{251F9AC9-3765-498C-83FD-DB3539A19CB3}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{251F9AC9-3765-498C-83FD-DB3539A19CB3}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{251F9AC9-3765-498C-83FD-DB3539A19CB3}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
{729CB7AA-AB0D-4C39-AA17-7435E61FA0A6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||
{729CB7AA-AB0D-4C39-AA17-7435E61FA0A6}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||
{729CB7AA-AB0D-4C39-AA17-7435E61FA0A6}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||
{729CB7AA-AB0D-4C39-AA17-7435E61FA0A6}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
||||
@@ -2,9 +2,13 @@
|
||||
using GUI.Model;
|
||||
using ImageMagick;
|
||||
using Microsoft.Win32;
|
||||
using Ocr.Tesseract;
|
||||
using Ocr.Tesseract.Configuration;
|
||||
using Ocr.Tesseract.Models;
|
||||
using Ocr.Tesseract.Screenshots;
|
||||
using Ocr.Tesseract.Screenshots.Configuration;
|
||||
using Ocr.Tesseract.Screenshots.Threshold;
|
||||
using Process.Abstract.Configuration;
|
||||
using Process.Interface;
|
||||
using Serilog;
|
||||
using System;
|
||||
@@ -14,33 +18,91 @@ using System.ComponentModel;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text.RegularExpressions;
|
||||
using System.Threading.Tasks;
|
||||
using System.Windows;
|
||||
using System.Windows.Input;
|
||||
|
||||
namespace GUI.ViewModels;
|
||||
|
||||
internal class ImageViewModel : ScreenshotScanner, INotifyPropertyChanged
|
||||
internal class ImageViewModel : INotifyPropertyChanged
|
||||
{
|
||||
private static ITesseractConfiguration CreateTesseractConfiguration() =>
|
||||
new TesseractScreenshotConfiguration()
|
||||
/// <summary>
|
||||
/// The internally used <see cref="ScreenshotScanner"/>
|
||||
/// </summary>
|
||||
public ScreenshotScanner Scanner { get; private set; }
|
||||
|
||||
/// <summary>
|
||||
/// Tesseract engine configuration
|
||||
/// </summary>
|
||||
public static readonly ITesseractConfiguration TesseractConfig =
|
||||
new TesseractScreenshotConfiguration
|
||||
{
|
||||
DataPath = "tessdata",
|
||||
Languages = new[] { "eng", "deu" }
|
||||
};
|
||||
|
||||
public ImageViewModel() : base(new(), CreateTesseractConfiguration())
|
||||
public ScreenshotProcessorConfiguration ProcessorConfig { get; } = new();
|
||||
|
||||
/// <summary>
|
||||
/// <see cref="Regex"/> expression for extracting whole words from scan results
|
||||
/// </summary>
|
||||
public static readonly Regex WordRegex = new(
|
||||
@"[\w'\-]{2,}",
|
||||
RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase
|
||||
);
|
||||
|
||||
public ImageViewModel()
|
||||
{
|
||||
ImageProcessorConfiguration.PropertyChanged += (sender, args) => Task.Run(UpdateImage);
|
||||
Scanner = new ScreenshotScanner(MakeProcessor());
|
||||
ProcessorConfig.PropertyChanged += (sender, args) => Task.Run(UpdateImage);
|
||||
|
||||
OpenFileCommand = new Command(OpenFile);
|
||||
SaveEditedImageCommand = new Command(SaveEditedImage);
|
||||
}
|
||||
|
||||
public ImageViewModel(MagickImage image) : this()
|
||||
{
|
||||
Image = image;
|
||||
}
|
||||
|
||||
private IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
||||
{
|
||||
var threshold =
|
||||
new ThresholdAdaptiveProcessor(
|
||||
ProcessorConfig.ThresholdWidth,
|
||||
ProcessorConfig.ThresholdHeight
|
||||
);
|
||||
|
||||
var preprocessing = new ProcessorChainConfiguration<MagickImage, MagickImage>()
|
||||
.Use(new CloneImageProcessor())
|
||||
.Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh))
|
||||
.Use(new NormalizeProcessor())
|
||||
.Use(threshold)
|
||||
.Use(new AddBorderProcessor(ProcessorConfig.Border))
|
||||
.Use(new BinarizeProcessor())
|
||||
.Complete(new NegateCloneProcessor());
|
||||
|
||||
var postprocessing = new ProcessorChainConfiguration<ScanResult, ScanResult>()
|
||||
.Use(new ConfidenceFilter(50))
|
||||
.Use(new ToLowerProcessor())
|
||||
.Use(new DuplicateFilter())
|
||||
.Complete(new RegexFilter(WordRegex));
|
||||
|
||||
var scan = new TesseractProcessor(TesseractConfig);
|
||||
|
||||
return new ProcessorChainConfiguration<MagickImage, ScanResult>()
|
||||
.Use(preprocessing)
|
||||
.Use(new ProcessingEvent<MagickImage>(OnProcessing))
|
||||
.Use(scan)
|
||||
.Use(new ProcessingEvent<ScanResult>(OnProcessed))
|
||||
.Complete(postprocessing);
|
||||
}
|
||||
|
||||
#region Overrides of Scanner
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void OnProcessing(IProcessor sender, ICollection<MagickImage> inputs)
|
||||
protected void OnProcessing(IProcessor sender, ICollection<MagickImage> inputs)
|
||||
{
|
||||
Application.Current.Dispatcher.Invoke(() =>
|
||||
{
|
||||
@@ -52,30 +114,28 @@ internal class ImageViewModel : ScreenshotScanner, INotifyPropertyChanged
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override void OnProcessed(IProcessor sender, ICollection<ScanResult> inputs)
|
||||
protected void OnProcessed(IProcessor sender, ICollection<ScanResult> inputs)
|
||||
{
|
||||
ScannedText = $"[{inputs.Count} words] " + string.Join(' ', inputs);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
public ImageViewModel(MagickImage image) : this()
|
||||
{
|
||||
Image = image;
|
||||
}
|
||||
|
||||
private void Clear()
|
||||
/// <inheritdoc />
|
||||
public void Clear()
|
||||
{
|
||||
Scanner.Clear();
|
||||
Application.Current.Dispatcher.Invoke(() =>
|
||||
{
|
||||
ScannedText = string.Empty;
|
||||
Words.Clear();
|
||||
Lookup.Clear();
|
||||
Edited.Clear();
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region File Handling
|
||||
|
||||
private void OpenFile()
|
||||
{
|
||||
var dialog = new OpenFileDialog()
|
||||
@@ -105,10 +165,14 @@ internal class ImageViewModel : ScreenshotScanner, INotifyPropertyChanged
|
||||
);
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Updating data
|
||||
|
||||
private void UpdateConfidence()
|
||||
{
|
||||
Confidence = Lookup.Keys.Any()
|
||||
? Lookup.Keys.Sum(key => key.Confidence) / Lookup.Keys.Count
|
||||
Confidence = Scanner.Lookup.Keys.Any()
|
||||
? Scanner.Lookup.Keys.Sum(key => key.Confidence) / Scanner.Lookup.Keys.Count
|
||||
: 0;
|
||||
}
|
||||
|
||||
@@ -121,7 +185,7 @@ internal class ImageViewModel : ScreenshotScanner, INotifyPropertyChanged
|
||||
Clear();
|
||||
if (Image != null)
|
||||
{
|
||||
Process(new[] { Image });
|
||||
Scanner.Process(new[] { Image });
|
||||
}
|
||||
|
||||
UpdateWords();
|
||||
@@ -135,13 +199,15 @@ internal class ImageViewModel : ScreenshotScanner, INotifyPropertyChanged
|
||||
{
|
||||
Application.Current.Dispatcher.Invoke(() =>
|
||||
{
|
||||
foreach (var word in Lookup.Keys)
|
||||
foreach (var word in Scanner.Lookup.Keys)
|
||||
{
|
||||
Words.Add(word);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Properties
|
||||
|
||||
private float _confidence;
|
||||
|
||||
@@ -160,7 +160,7 @@
|
||||
<CheckBox
|
||||
x:Name="EnableThreshold"
|
||||
Content="Apply Threshold"
|
||||
IsChecked="{Binding ImageProcessorConfiguration.EnableThresholding}" />
|
||||
IsChecked="{Binding ProcessorConfig.EnableThresholding}" />
|
||||
|
||||
<Grid
|
||||
Margin="4"
|
||||
@@ -209,7 +209,7 @@
|
||||
|
||||
<CheckBox
|
||||
Content="Resize"
|
||||
IsChecked="{Binding ImageProcessorConfiguration.EnableResizing}" />
|
||||
IsChecked="{Binding ProcessorConfig.EnableResizing}" />
|
||||
<Grid Margin="4">
|
||||
<Grid.ColumnDefinitions>
|
||||
<ColumnDefinition Width="Auto" />
|
||||
@@ -235,7 +235,7 @@
|
||||
|
||||
<CheckBox
|
||||
Content="Filter connected components"
|
||||
IsChecked="{Binding ImageProcessorConfiguration.FilterConnectedComponents}" />
|
||||
IsChecked="{Binding ProcessorConfig.FilterConnectedComponents}" />
|
||||
</UniformGrid>
|
||||
</Grid>
|
||||
</Window>
|
||||
@@ -29,20 +29,20 @@ namespace GUI.Views
|
||||
private void SldThreshold1_OnDragCompleted(object sender, DragCompletedEventArgs args)
|
||||
{
|
||||
var vm = ViewModel;
|
||||
vm.ImageProcessorConfiguration.ThresholdWidth = (int)Math.Round(((Slider)sender).Value);
|
||||
vm.ProcessorConfig.ThresholdWidth = (int)Math.Round(((Slider)sender).Value);
|
||||
}
|
||||
|
||||
private void SldThreshold2_OnDragCompleted(object sender, DragCompletedEventArgs args)
|
||||
{
|
||||
var vm = ViewModel;
|
||||
vm.ImageProcessorConfiguration.ThresholdHeight = (int)Math.Round(((Slider)sender).Value);
|
||||
vm.ProcessorConfig.ThresholdHeight = (int)Math.Round(((Slider)sender).Value);
|
||||
}
|
||||
|
||||
|
||||
private void SldBorder_OnDragCompleted(object sender, DragCompletedEventArgs e)
|
||||
{
|
||||
var vm = ViewModel;
|
||||
vm.ImageProcessorConfiguration.Border = (int)Math.Round(((Slider)sender).Value);
|
||||
vm.ProcessorConfig.Border = (int)Math.Round(((Slider)sender).Value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,185 @@
|
||||
namespace ReportGenerator;
|
||||
|
||||
internal struct CharacterErrorInfo
|
||||
{
|
||||
public string TaggedWord { get; }
|
||||
public string? ScannedWord { get; set; } = null;
|
||||
public double CharacterError { get; set; } = double.PositiveInfinity;
|
||||
|
||||
public CharacterErrorInfo(string taggedWord) => TaggedWord = taggedWord;
|
||||
}
|
||||
|
||||
internal struct WordErrorInfo
|
||||
{
|
||||
public double WordError { get; set; } = double.PositiveInfinity;
|
||||
|
||||
public ICollection<CharacterErrorInfo> Words { get; } = new List<CharacterErrorInfo>();
|
||||
|
||||
public double CharacterErrorAvg => Words.Average(i => i.CharacterError);
|
||||
|
||||
public WordErrorInfo()
|
||||
{
|
||||
}
|
||||
}
|
||||
|
||||
internal struct ScanTable
|
||||
{
|
||||
public string ImageName { get; set; }
|
||||
|
||||
public ICollection<ScanTableRow> Scans { get; set; }
|
||||
}
|
||||
|
||||
internal struct ScanTableRow
|
||||
{
|
||||
public string ScannerName { get; set; }
|
||||
|
||||
public ICollection<string> ScannedWords { get; set; }
|
||||
}
|
||||
|
||||
internal static class Program
|
||||
{
|
||||
internal static void Main(string[] args)
|
||||
{
|
||||
var errorInfos = new List<(ScannedResultInfo scan, WordErrorInfo error)>();
|
||||
|
||||
var tagFileInfos = GetTagFileInfos(args[0]);
|
||||
var scanFileInfos = GetScanFileInfos(args[1]).ToLookup(i => i.ImageName);
|
||||
|
||||
Directory.CreateDirectory("reports");
|
||||
|
||||
foreach (var tagFileInfo in tagFileInfos)
|
||||
{
|
||||
var taggedWords = tagFileInfo.GetWords();
|
||||
|
||||
foreach (var scanFileInfo in scanFileInfos[tagFileInfo.ImageName])
|
||||
{
|
||||
var scannedWords = scanFileInfo.GetWords();
|
||||
if (!scannedWords.Any())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
// Calculate WER by comparing all tagged with all scanned words
|
||||
var wordErrorInfo = new WordErrorInfo
|
||||
{
|
||||
WordError = CalculateWer(taggedWords, scannedWords),
|
||||
};
|
||||
|
||||
// Calculate CER for each tagged word
|
||||
foreach (var taggedWord in taggedWords)
|
||||
{
|
||||
var characterErrorInfo = new CharacterErrorInfo(taggedWord);
|
||||
|
||||
foreach (var scannedWord in scannedWords)
|
||||
{
|
||||
// Calculates the levenshtein distance to every word and returns the most similar combination
|
||||
var err = CalculateCer(taggedWord, scannedWord);
|
||||
|
||||
if (err < characterErrorInfo.CharacterError)
|
||||
{
|
||||
characterErrorInfo.ScannedWord = scannedWord;
|
||||
characterErrorInfo.CharacterError = err;
|
||||
|
||||
if (err == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
wordErrorInfo.Words.Add(characterErrorInfo);
|
||||
}
|
||||
|
||||
errorInfos.Add((scanFileInfo, wordErrorInfo));
|
||||
}
|
||||
}
|
||||
|
||||
// Somewhat off based on the amount of expected words
|
||||
// If a processor did scan nothing at all this value can be very low
|
||||
var bestCharErrorProcessor = errorInfos
|
||||
.GroupBy(e => e.scan.ProcessorName, e => e.error)
|
||||
.Select(g => (g.Key, g.Average(i => i.CharacterErrorAvg)))
|
||||
.OrderBy(g => g.Item2)
|
||||
.ToArray();
|
||||
|
||||
// Same here but with less impact
|
||||
var bestWordErrorProcessor = errorInfos
|
||||
.GroupBy(e => e.scan.ProcessorName, e => e.error)
|
||||
.Select(g => (g.Key, g.Average(i => i.WordError)))
|
||||
.OrderBy(g => g.Item2)
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
static double CalculateCer(string s1, string s2)
|
||||
{
|
||||
if (string.IsNullOrEmpty(s1) || string.IsNullOrEmpty(s2))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var distance = new int[s1.Length + 1, s2.Length + 1];
|
||||
|
||||
for (var i = 0; i <= s1.Length; i++)
|
||||
{
|
||||
distance[i, 0] = i;
|
||||
}
|
||||
|
||||
for (var j = 0; j <= s2.Length; j++)
|
||||
{
|
||||
distance[0, j] = j;
|
||||
}
|
||||
|
||||
for (var i = 1; i <= s1.Length; i++)
|
||||
{
|
||||
for (var j = 1; j <= s2.Length; j++)
|
||||
{
|
||||
var cost = s2[j - 1] == s1[i - 1] ? 0 : 1;
|
||||
|
||||
var c1 = Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1);
|
||||
var c2 = distance[i - 1, j - 1] + cost;
|
||||
distance[i, j] = Math.Min(c1, c2);
|
||||
}
|
||||
}
|
||||
|
||||
return distance[s1.Length, s2.Length];
|
||||
}
|
||||
|
||||
static double CalculateWer(ICollection<string> expected, ICollection<string> actual)
|
||||
{
|
||||
// Amount of words that need to be substituted to match the original
|
||||
int substitutions = expected
|
||||
.Zip(
|
||||
actual,
|
||||
(e, a) => string.Equals(e, a) ? 0 : 1
|
||||
)
|
||||
.Sum();
|
||||
|
||||
// Amount of words dropped from the original
|
||||
int deletions = expected.Except(actual).Count();
|
||||
|
||||
// Amount of extra words added compared to the original
|
||||
int insertions = actual.Except(expected).Count();
|
||||
|
||||
return (substitutions + deletions + insertions) / (double)expected.Count;
|
||||
}
|
||||
|
||||
private static IEnumerable<TagFileInfo> GetTagFileInfos(string dir)
|
||||
{
|
||||
if (!Directory.Exists(dir))
|
||||
{
|
||||
throw new ArgumentException($"Invalid tagged data directory '{dir}'");
|
||||
}
|
||||
|
||||
return Directory.EnumerateFiles(dir, "*.json").Select(TagFileInfo.FromPath);
|
||||
}
|
||||
|
||||
private static IEnumerable<ScannedResultInfo> GetScanFileInfos(string dir)
|
||||
{
|
||||
if (!Directory.Exists(dir))
|
||||
{
|
||||
throw new ArgumentException($"Invalid scan results directory '{dir}'");
|
||||
}
|
||||
|
||||
return Directory.EnumerateFiles(dir, "*.json").Select(ScannedResultInfo.FromPath);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"profiles": {
|
||||
"ReportGenerator": {
|
||||
"commandName": "Project",
|
||||
"commandLineArgs": "\"img\" \"results\"",
|
||||
"workingDirectory": "D:\\git\\BA\\Examples\\CLI\\bin\\Debug\\net6.0"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,45 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace ReportGenerator;
|
||||
|
||||
internal struct ScannedResultInfo
|
||||
{
|
||||
public string Path { get; private init; }
|
||||
|
||||
private static readonly Regex parseRegex = new(
|
||||
@"(?'image'.+)\.(?'processor'.+)\..+",
|
||||
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase
|
||||
);
|
||||
|
||||
public string ProcessorName { get; set; }
|
||||
|
||||
public string ImageName { get; set; }
|
||||
|
||||
public ICollection<string> GetWords()
|
||||
{
|
||||
using var file = File.OpenRead(Path);
|
||||
return JsonDocument
|
||||
.Parse(file)
|
||||
.RootElement
|
||||
.EnumerateArray()
|
||||
.Select(e =>
|
||||
e.GetProperty("Text").GetString() ?? throw new Exception("Cannot parse null words"))
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
public static ScannedResultInfo FromPath(string path)
|
||||
{
|
||||
var match = parseRegex.Match(System.IO.Path.GetFileName(path));
|
||||
return new ScannedResultInfo
|
||||
{
|
||||
Path = path,
|
||||
ProcessorName = match.Groups["processor"].Value,
|
||||
ImageName = match.Groups["image"].Value
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => ImageName;
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
using System.Text.Json;
|
||||
|
||||
namespace ReportGenerator;
|
||||
|
||||
internal struct TagFileInfo
|
||||
{
|
||||
public string Path { get; private init; }
|
||||
|
||||
public string ImageName { get; set; }
|
||||
|
||||
public ICollection<string> GetWords()
|
||||
{
|
||||
using var file = File.OpenRead(Path);
|
||||
return JsonDocument
|
||||
.Parse(file)
|
||||
.RootElement
|
||||
.GetProperty("words")
|
||||
.EnumerateArray()
|
||||
.Select(w => w.GetString() ?? throw new Exception("Cannot parse null words"))
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
public static TagFileInfo FromPath(string path) => new()
|
||||
{
|
||||
Path = path,
|
||||
ImageName = System.IO.Path.GetFileNameWithoutExtension(path),
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => ImageName;
|
||||
}
|
||||
@@ -17,4 +17,11 @@ public class AutoThresholdProcessor : SingleImageProcessor
|
||||
image.AutoThreshold(_method);
|
||||
return image;
|
||||
}
|
||||
|
||||
#region Overrides of Object
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => $"{nameof(AutoThresholdProcessor)}({_method})";
|
||||
|
||||
#endregion
|
||||
}
|
||||
@@ -5,16 +5,27 @@ namespace Ocr.Tesseract.Screenshots.Threshold;
|
||||
|
||||
public class ThresholdAdaptiveProcessor : SingleImageProcessor
|
||||
{
|
||||
private readonly int _width;
|
||||
private readonly int _height;
|
||||
private readonly int _width;
|
||||
private readonly int _height;
|
||||
|
||||
public ThresholdAdaptiveProcessor(int width, int height)
|
||||
{
|
||||
_width = width;
|
||||
_height = height;
|
||||
}
|
||||
public ThresholdAdaptiveProcessor(int size) : this(size, size)
|
||||
{
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override MagickImage Process(MagickImage image) => image
|
||||
.ThresholdAdaptive(_width, _height);
|
||||
}
|
||||
public ThresholdAdaptiveProcessor(int width, int height)
|
||||
{
|
||||
_width = width;
|
||||
_height = height;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override MagickImage Process(MagickImage image) => image
|
||||
.ThresholdAdaptive(_width, _height);
|
||||
|
||||
#region Overrides of Object
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => $"{nameof(ThresholdAdaptiveProcessor)}({_width:D2}_{_height:D2})";
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
@@ -4,18 +4,25 @@ namespace Ocr.Tesseract.Screenshots.Threshold;
|
||||
|
||||
public class ThresholdProcessor : SingleImageProcessor
|
||||
{
|
||||
private readonly Percentage _percentage;
|
||||
private readonly Percentage _percentage;
|
||||
|
||||
|
||||
public ThresholdProcessor(int percentage)
|
||||
{
|
||||
_percentage = new Percentage(percentage);
|
||||
}
|
||||
public ThresholdProcessor(int percentage)
|
||||
{
|
||||
_percentage = new Percentage(percentage);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override MagickImage Process(MagickImage image)
|
||||
{
|
||||
image.Threshold(_percentage);
|
||||
return image;
|
||||
}
|
||||
}
|
||||
/// <inheritdoc />
|
||||
protected override MagickImage Process(MagickImage image)
|
||||
{
|
||||
image.Threshold(_percentage);
|
||||
return image;
|
||||
}
|
||||
|
||||
#region Overrides of Object
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => $"{nameof(ThresholdProcessor)}({_percentage})";
|
||||
|
||||
#endregion
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using ImageMagick;
|
||||
using System.Text.Json.Serialization;
|
||||
|
||||
namespace Ocr.Tesseract.Models;
|
||||
|
||||
@@ -26,6 +27,7 @@ public struct ScanResult
|
||||
/// <summary>
|
||||
/// Value referenced by <see cref="Word"/>
|
||||
/// </summary>
|
||||
[JsonIgnore]
|
||||
public MagickImage Image { get; set; }
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user