// See https://aka.ms/new-console-template for more information using System.Text.Json; using System.Text.RegularExpressions; using ImageMagick; using Ocr.Tesseract; using Ocr.Tesseract.Models; using Ocr.Tesseract.Screenshots; using Ocr.Tesseract.Screenshots.Configuration; using Ocr.Tesseract.Screenshots.Threshold; using Process.Abstract.Configuration; using Process.Interface; var wordRegex = new Regex( @"[\w'\-]{2,}", RegexOptions.Compiled | RegexOptions.Multiline | RegexOptions.IgnoreCase ); var tesseractConfig = new TesseractScreenshotConfiguration { DataPath = "tessdata", Languages = new[] { "eng", "deu" } }; var jsonOptions = new JsonSerializerOptions() { WriteIndented = true, Encoder = System.Text.Encodings.Web.JavaScriptEncoder.UnsafeRelaxedJsonEscaping }; var processor = MakeProcessor(); processor.Process(new[] { new MagickImage(args.Single()) }); return; IProcessorChain MakeProcessor() { var preprocessing = new ProcessorChainConfiguration() .Use(new CloneImageProcessor()) .Use(new ResizeProcessor(FilterType.Lanczos2Sharp, PixelInterpolateMethod.Mesh)) .Use(new NormalizeProcessor()) .Use(new ThresholdAdaptiveProcessor(15, 15)) .Use(new AddBorderProcessor(10)) .Use(new BinarizeProcessor()) .Complete(new NegateCloneProcessor()); var postprocessing = new ProcessorChainConfiguration() .Use(new ProcessingEvent((_, data) => WriteToFile(data, "source"))) .Use(new ConfidenceFilter(50)) .Use(new ProcessingEvent((_, data) => WriteToFile(data, "confidence"))) .Use(new ToLowerProcessor()) .Use(new ProcessingEvent((_, data) => WriteToFile(data, "normalize"))) .Use(new DuplicateFilter()) .Use(new ProcessingEvent((_, data) => WriteToFile(data, "duplicates"))) .Use(new RegexFilter(wordRegex)) .Complete(new ProcessingEvent((_, data) => WriteToFile(data, "regex"))); var scan = new TesseractProcessor(tesseractConfig); return new ProcessorChainConfiguration() .Use(preprocessing) .Use(scan) .Complete(postprocessing); } void WriteToFile(ICollection data, string name) { using var file1 = File.Open($"{name}.detailed.json", FileMode.Create); JsonSerializer.Serialize(file1, data.Select(WordInfo.Create), jsonOptions); using var file2 = File.Open($"{name}.json", FileMode.Create); JsonSerializer.Serialize(file2, data.Select(d => d.Word.Text), jsonOptions); } struct WordInfo { public string Text { get; set; } public double Confidence { get; set; } public static WordInfo Create(ScanResult result) => new() { Text = result.Word.Text, Confidence = result.Word.Confidence }; }