diff --git a/Examples/CLI/EvaluationProcessor.cs b/Examples/CLI/EvaluationProcessor.cs index c00c568..2284583 100644 --- a/Examples/CLI/EvaluationProcessor.cs +++ b/Examples/CLI/EvaluationProcessor.cs @@ -45,13 +45,13 @@ internal class EvaluationProcessor .Use(new DuplicateFilter()) .Complete(new RegexFilter(WordRegex)); - private readonly IProcessor _thresholdProcessor; + private readonly StopwatchProcessor _thresholdProcessor; public string OutputFolder { get; init; } = "results"; public EvaluationProcessor(IProcessor thresholdProcessor) { - _thresholdProcessor = thresholdProcessor; + _thresholdProcessor = new StopwatchProcessor(thresholdProcessor); } /// @@ -59,12 +59,21 @@ internal class EvaluationProcessor { Directory.CreateDirectory(OutputFolder); - var processor = MakeProcessor(); - var results = processor.Process(new[] { image }).Select(r => r.Word); + var words = MakeProcessor() + .Process(new[] { image }) + .Select(r => r.Word) + .ToArray(); + + var result = new + { + Words = words.ToArray(), + Elapsed = _thresholdProcessor.Elapsed?.TotalMilliseconds, + }; var name = Path.GetFileNameWithoutExtension(image.FileName); - await using var file = File.OpenWrite(Path.Combine(OutputFolder, $"{name}.{_thresholdProcessor}.json")); - await JsonSerializer.SerializeAsync(file, results); + var path = Path.Combine(OutputFolder, $"{name}.{_thresholdProcessor}.json"); + await using var file = File.OpenWrite(path); + await JsonSerializer.SerializeAsync(file, result); }); private IProcessorChain MakeProcessor() @@ -93,7 +102,8 @@ internal class EvaluationProcessor { var image = tImages[i].CloneImage(); var name = Path.GetFileName(image.FileName); - image.Write(Path.Combine(OutputFolder, $"{_thresholdProcessor}.{i:D2}.{name}")); + var path = Path.Combine(OutputFolder, $"{_thresholdProcessor}.{i:D2}.{name}"); + image.Write(path); } return tImages; diff --git a/Examples/CLI/Properties/launchSettings.json b/Examples/CLI/Properties/launchSettings.json index b77cd1f..c348a1d 100644 --- a/Examples/CLI/Properties/launchSettings.json +++ b/Examples/CLI/Properties/launchSettings.json @@ -6,7 +6,7 @@ }, "Test all img": { "commandName": "Project", - "commandLineArgs": "\"img/*\"" + "commandLineArgs": "\"img/*.png\"" }, "Test single img": { "commandName": "Project", diff --git a/Examples/CLI/StopwatchProcessor.cs b/Examples/CLI/StopwatchProcessor.cs new file mode 100644 index 0000000..eec305d --- /dev/null +++ b/Examples/CLI/StopwatchProcessor.cs @@ -0,0 +1,32 @@ +using Process.Abstract; +using Process.Interface; +using System.Diagnostics; + +public class StopwatchProcessor : Processor +{ + private readonly IProcessor _processor; + + /// + /// Execution time of the last processing action + /// + public TimeSpan? Elapsed { get; private set; } + + public StopwatchProcessor(IProcessor processor) + { + _processor = processor; + } + + + public override IEnumerable Process(IEnumerable inputs) + { + var stopWatch = Stopwatch.StartNew(); + var results = _processor.Process(inputs); + stopWatch.Stop(); + Elapsed = stopWatch.Elapsed; + return results; + } + + + /// + public override string? ToString() => _processor.ToString(); +} diff --git a/Examples/Common/Common.csproj b/Examples/Common/Common.csproj index 1888fc0..d77d2a4 100644 --- a/Examples/Common/Common.csproj +++ b/Examples/Common/Common.csproj @@ -12,7 +12,6 @@ - diff --git a/Examples/Common/Distance/Calculator.cs b/Examples/Common/Distance/Calculator.cs new file mode 100644 index 0000000..4cad211 --- /dev/null +++ b/Examples/Common/Distance/Calculator.cs @@ -0,0 +1,77 @@ +using System.Collections; + +namespace Common.Distance; + +public static class Calculator +{ + /// + /// Calculates the levenshtein distance between + /// + /// + /// + /// + /// + public static double GetDistance(T reference, T? hypothesis) + where T : IEnumerable + { + // Setup + var refArr = reference.Cast().ToArray(); + var hypArr = hypothesis?.Cast().ToArray() ?? Array.Empty(); + + if (!hypArr.Any()) + { + return refArr.Length; + // return double.PositiveInfinity; // Adjust penalty for empty scans + } + + if (Equals(refArr, hypArr)) + { + return 0; + } + + var distance = new int[refArr.Length + 1, hypArr.Length + 1]; + + // Fill matrix + for (var x = 0; x <= refArr.Length; x++) + { + // Reference on X axis + distance[x, 0] = x; + } + + for (var y = 0; y <= hypArr.Length; y++) + { + // Hypothesis on Y axis + distance[0, y] = y; + } + + // Calculate distance + for (var x = 0; x < refArr.Length; x++) + { + for (var y = 0; y < hypArr.Length; y++) + { + // BL Cost depends on whether the two elements are equal + var cost = Equals(refArr[x], hypArr[y]) ? 0 : 1; + + // Apply distance mask + var c1 = distance[x, y] + cost; // Bottom left + + var c2 = distance[x, y + 1] + 1; // Top left + var c3 = distance[x + 1, y] + 1; // Bottom right + + distance[x + 1, y + 1] = Min(c1, c2, c3); // Top right + } + } + + return distance[refArr.Length, hypArr.Length]; + } + + private static T Min(params T[] values) + { + if (!values.Any()) + { + throw new ArgumentException("Array cannot be empty", nameof(values)); + } + + return values.Min()!; + } +} \ No newline at end of file diff --git a/Examples/Common/Distance/DistanceComparer.cs b/Examples/Common/Distance/DistanceComparer.cs new file mode 100644 index 0000000..3925a34 --- /dev/null +++ b/Examples/Common/Distance/DistanceComparer.cs @@ -0,0 +1,36 @@ +using ReportGenerator.Models; +using System.Collections; + +namespace Common.Distance; + +public readonly struct DistanceComparer : IDistanceComparer + where T : IEnumerable +{ + /// + public T Reference { get; } + + /// + public T? Hypothesis { get; } + + /// + public double Distance { get; } + + public DistanceComparer(T reference) : this(reference, default) + { + } + + public DistanceComparer(T reference, T? hypothesis) + { + Reference = reference; + Hypothesis = hypothesis; + Distance = Calculator.GetDistance(Reference, Hypothesis); + } + + /// + public override string ToString() => Hypothesis switch + { + null => "`null`", + var hyp when Equals(hyp, Reference) => Hypothesis.ToString() ?? string.Empty, + _ => $"{Hypothesis}" + }; +} diff --git a/Examples/Common/Distance/IDistanceComparer.cs b/Examples/Common/Distance/IDistanceComparer.cs new file mode 100644 index 0000000..931387d --- /dev/null +++ b/Examples/Common/Distance/IDistanceComparer.cs @@ -0,0 +1,27 @@ +using System.Collections; + +namespace ReportGenerator.Models; + +public interface IDistanceComparer +{ + /// + /// The calculated distance between + /// and + /// + /// + public double Distance { get; } +} + +public interface IDistanceComparer : IDistanceComparer + where T : IEnumerable +{ + /// + /// The comparison reference, meaning the "known to be correct" value + /// + public T Reference { get; } + + /// + /// The value hypothesis, whose correctness is checked against + /// + public T? Hypothesis { get; } +} \ No newline at end of file diff --git a/Examples/Common/Extensions/StringBuilderExtensions.cs b/Examples/Common/Extensions/StringBuilderExtensions.cs new file mode 100644 index 0000000..d6ea196 --- /dev/null +++ b/Examples/Common/Extensions/StringBuilderExtensions.cs @@ -0,0 +1,15 @@ +using System.Text; + +namespace Common.Extensions; + +public static class StringBuilderExtensions +{ + public static StringBuilder AppendHeading(this StringBuilder sb, int level, string text) => sb + .Append(new string('#', level)) + .Append(' ') + .AppendParagraph(text); + + public static StringBuilder AppendParagraph(this StringBuilder sb, string text) => sb + .AppendLine(text) + .AppendLine(); +} \ No newline at end of file diff --git a/Examples/Common/Extensions/StringExtensions.cs b/Examples/Common/Extensions/StringExtensions.cs index 24b4875..0bf115d 100644 --- a/Examples/Common/Extensions/StringExtensions.cs +++ b/Examples/Common/Extensions/StringExtensions.cs @@ -1,14 +1,10 @@ -using System.Text.RegularExpressions; - -namespace Common.Extensions; +namespace Common.Extensions; /// /// Extensions for the string object type /// public static class StringExtensions { - private static readonly Regex patternRegex = new Regex(@"^\*$"); - /// /// Determines whether this string contains the specified string. Not case sensitive. /// @@ -24,18 +20,20 @@ public static class StringExtensions /// /// /// - public static ICollection ExpandPath(this string self) + public static IEnumerable ExpandPath(this string self) { - string pattern = Path.GetFileName(self); - if (patternRegex.IsMatch(pattern)) + var parts = self.Split(Path.DirectorySeparatorChar); + + var fileName = parts.Last(); + if (fileName.Contains('*') || fileName.Contains('?')) { - return Directory.GetFiles( - self.Substring(0, self.Length - pattern.Length), - pattern, - SearchOption.TopDirectoryOnly - ); + // Path contains file pattern + + var path = Path.Combine(parts.SkipLast(1).ToArray()); + return Directory.EnumerateFiles(path, fileName); } + // Path contains no pattern return new[] { self }; } -} \ No newline at end of file +} diff --git a/Examples/ReportGenerator/Models/CharacterStats.cs b/Examples/ReportGenerator/Models/CharacterStats.cs deleted file mode 100644 index c2a10f4..0000000 --- a/Examples/ReportGenerator/Models/CharacterStats.cs +++ /dev/null @@ -1,61 +0,0 @@ -namespace ReportGenerator.Models; - -internal readonly struct CharacterStats -{ - public string Reference { get; } - - public string Value { get; } - - public double CharacterError { get; } - - public CharacterStats(string reference) - { - Reference = reference; - Value = string.Empty; - CharacterError = double.PositiveInfinity; - } - - public CharacterStats(string reference, string value) - { - Value = value; - Reference = reference; - - CharacterError = CalculateCer(reference, value); - } - - private static double CalculateCer(string s1, string s2) - { - var distance = new int[s1.Length + 1, s2.Length + 1]; - - for (var i = 0; i <= s1.Length; i++) - { - distance[i, 0] = i; - } - - for (var j = 0; j <= s2.Length; j++) - { - distance[0, j] = j; - } - - for (var i = 1; i <= s1.Length; i++) - { - for (var j = 1; j <= s2.Length; j++) - { - var cost = s2[j - 1] == s1[i - 1] ? 0 : 1; - - var c1 = Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1); - var c2 = distance[i - 1, j - 1] + cost; - distance[i, j] = Math.Min(c1, c2); - } - } - - return distance[s1.Length, s2.Length]; - } - - /// - public override string ToString() - { - var value = string.IsNullOrEmpty(Value) ? "`null`" : Value; - return $"{value} ({CharacterError})"; - } -} \ No newline at end of file diff --git a/Examples/ReportGenerator/Models/ImageStats.cs b/Examples/ReportGenerator/Models/ImageStats.cs index 709c6c4..cc53e86 100644 --- a/Examples/ReportGenerator/Models/ImageStats.cs +++ b/Examples/ReportGenerator/Models/ImageStats.cs @@ -1,42 +1,27 @@ namespace ReportGenerator.Models; -internal readonly struct ImageStats +public readonly struct ImageStats { - public string ImageName { get; } = string.Empty; + public string ImageName { get; } - public ICollection Reference { get; } = Array.Empty(); - public ICollection Stats { get; } = Array.Empty(); + public ICollection Reference { get; } + + public ICollection Processors { get; } public ImageStats( string imageName, ICollection taggedWords, - IEnumerable scanResult + IEnumerable scanResult ) { Reference = taggedWords; ImageName = imageName; - Stats = scanResult - .Select(t => new ProcessorStat(t.ProcessorName, taggedWords, t.GetWords())) + Processors = scanResult + .Select(t => + { + var (elapsed, words) = t.GetData(); + return new ProcessorStat(t.ProcessorName, elapsed, taggedWords, words); + }) .ToArray(); } - - - public IEnumerable> ToTable() - { - // Title - yield return Reference - .Prepend("Processor") - .Append("WER") - .Append("CER (avg)") - .Append("CER (sum)"); - - // Spacer - yield return Enumerable.Range(0, Reference.Count + 4).Select(_ => "---"); - - // Content - foreach (var stat in Stats) - { - yield return stat.ToRow(); - } - } } diff --git a/Examples/ReportGenerator/Models/ProcessorStat.cs b/Examples/ReportGenerator/Models/ProcessorStat.cs index c3807f4..09742df 100644 --- a/Examples/ReportGenerator/Models/ProcessorStat.cs +++ b/Examples/ReportGenerator/Models/ProcessorStat.cs @@ -1,90 +1,82 @@ -namespace ReportGenerator.Models; +using Common.Distance; -internal readonly struct ProcessorStat +namespace ReportGenerator.Models; + +public readonly struct ProcessorStat : IDistanceComparer> { - public string ProcessorName { get; } = string.Empty; - public ICollection CharacterStats { get; } = Array.Empty(); - public double WordError { get; } = double.PositiveInfinity; - - public ProcessorStat( - string processorName, - ICollection reference, - ICollection values - ) - { - ProcessorName = processorName; - - WordError = CalculateWer( - reference, - values - ); - - CharacterStats = GetCharacterStat( - reference, - values - ).ToArray(); - } - - public IEnumerable ToRow() => CharacterStats - .Select(s => s.ToString()) - .Append(WordError.ToString("F2")) - .Append(CharacterStats.Average(s => s.CharacterError).ToString("F2")) - .Append(CharacterStats.Sum(s => s.CharacterError).ToString("F2")) - .Prepend(ProcessorName); + /// + /// The name of the processor + /// + public string Name { get; } /// - /// Finds the smallest possible CER by calculating the levenshtein - /// distance to every word and returning the most similar combination + /// The total milliseconds it took the processor to process the data /// - /// - private static IEnumerable GetCharacterStat( - IEnumerable reference, - ICollection values + public double ProcessingTime { get; } + + /// + public IEnumerable Reference { get; } + + /// + public IEnumerable? Hypothesis { get; } + + /// + public double Distance { get; } + + /// + /// Information about the processed values + /// + public ICollection> Words { get; } + + public ProcessorStat( + string name, + double processingTime, + ICollection reference, + ICollection hypothesis ) { - foreach (var refValue in reference) + Name = name; + ProcessingTime = processingTime; + + Reference = reference; + Hypothesis = hypothesis; + + Distance = Calculator.GetDistance( + reference.OrderBy(s => s).ToArray(), + hypothesis.OrderBy(s => s).ToArray() + ) / reference.Count; + + Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray(); + } + + /// + /// Compares the with all given + /// and determines the with the lowest error + /// + private static IDistanceComparer GetDistanceInfo( + string reference, IEnumerable values + ) + { + var result = new DistanceComparer(reference); + + // Determine character stat with lowest error + foreach (var value in values) { - CharacterStats result = new CharacterStats(refValue); - - foreach (var value in values) + var stat = new DistanceComparer(reference, value); + if (stat.Distance > result.Distance) { - var stat = new CharacterStats(refValue, value); - if (stat.CharacterError > result.CharacterError) - { - continue; - } - - result = stat; - - if (stat.CharacterError == 0) - { - break; - } + continue; } - yield return result; - } - } + result = stat; - static double CalculateWer(ICollection expected, ICollection actual) - { - if (!actual.Any()) - { - return double.PositiveInfinity; + if (stat.Distance == 0) + { + // We cannot go lower than zero, break + return result; + } } - // Amount of words that need to be substituted to match the original - int substitutions = expected - .Zip(actual, (e, a) => string.Equals(e, a) ? 0 : 1) - .Sum(); - - // todo this isn't correct i think - // Amount of words dropped from the original - int deletions = expected.Except(actual).Count(); - - // Amount of extra words added compared to the original - int insertions = actual.Except(expected).Count(); - - return (substitutions + deletions + insertions) / (double)expected.Count; + return result; } } diff --git a/Examples/ReportGenerator/Models/ScannedResultInfo.cs b/Examples/ReportGenerator/Models/ScanFileInfo.cs similarity index 64% rename from Examples/ReportGenerator/Models/ScannedResultInfo.cs rename to Examples/ReportGenerator/Models/ScanFileInfo.cs index 63f7e28..45fd4ca 100644 --- a/Examples/ReportGenerator/Models/ScannedResultInfo.cs +++ b/Examples/ReportGenerator/Models/ScanFileInfo.cs @@ -3,7 +3,7 @@ using System.Text.RegularExpressions; namespace ReportGenerator.Models; -internal struct ScannedResultInfo +public struct ScanFileInfo { public string Path { get; private init; } @@ -16,22 +16,26 @@ internal struct ScannedResultInfo public string ImageName { get; set; } - public ICollection GetWords() + public (double Elapsed, ICollection Words) GetData() { using var file = File.OpenRead(Path); - return JsonDocument - .Parse(file) - .RootElement + var root = JsonDocument.Parse(file).RootElement; + + var words = root + .GetProperty("Words") .EnumerateArray() - .Select(e => - e.GetProperty("Text").GetString() ?? throw new Exception("Cannot parse null words")) + .Select(e => e.GetProperty("Text").GetString() ?? string.Empty) .ToArray(); + + var elapsed = root.GetProperty("Elapsed").GetDouble(); + + return (elapsed, words); } - public static ScannedResultInfo FromPath(string path) + public static ScanFileInfo FromPath(string path) { var match = parseRegex.Match(System.IO.Path.GetFileName(path)); - return new ScannedResultInfo + return new ScanFileInfo { Path = path, ProcessorName = match.Groups["processor"].Value, diff --git a/Examples/ReportGenerator/Models/TableInfo.cs b/Examples/ReportGenerator/Models/TableInfo.cs deleted file mode 100644 index 9758f81..0000000 --- a/Examples/ReportGenerator/Models/TableInfo.cs +++ /dev/null @@ -1,49 +0,0 @@ -namespace ReportGenerator.Models; - -internal readonly struct TableInfo -{ - public IEnumerable> Rows { get; } = Enumerable.Empty>(); - - public string Title { get; init; } = string.Empty; - - public string RowStart { get; init; } = string.Empty; - public string RowEnd { get; init; } = string.Empty; - - public string ColumnStart { get; init; } = string.Empty; - public string ColumnEnd { get; init; } = string.Empty; - - public TableInfo(IEnumerable> rows) - { - Rows = rows; - } - - #region Overrides of ValueType - - /// - public override string ToString() - { - string result = string.Empty; - - // Title - result += Title; - - // Body - foreach (var row in Rows) - { - result += RowStart; - - foreach (var column in row) - { - result += ColumnStart; - result += column; - result += ColumnEnd; - } - - result += RowEnd; - } - - return result; - } - - #endregion -} \ No newline at end of file diff --git a/Examples/ReportGenerator/Program.cs b/Examples/ReportGenerator/Program.cs index 4cdcafb..cacbfe4 100644 --- a/Examples/ReportGenerator/Program.cs +++ b/Examples/ReportGenerator/Program.cs @@ -1,4 +1,5 @@ using ReportGenerator.Models; +using System.Text; namespace ReportGenerator; @@ -6,45 +7,45 @@ internal static class Program { internal static void Main(string[] args) { + // Retrieve data + + Console.WriteLine("Getting data"); var tagFileInfos = GetTagFileInfos(args[0]); var scanFileInfos = GetScanFileInfos(args[1]); - Directory.CreateDirectory("reports"); + // Parse - var stats = Scan(tagFileInfos, scanFileInfos); + Console.WriteLine("Evaluating"); + var scans = Scan(tagFileInfos, scanFileInfos); + var report = Table.ReportGenerator + .FromData(scans) + .WithTitle("OCR Report") + .WithBestOf("Best of") + .WithFullStatistic("Statistic") + .ToString(); - foreach (var stat in stats) - { - var tableFields = stat.ToTable(); - var tableInfo = new TableInfo(tableFields) - { - Title = stat.ImageName + Environment.NewLine, - RowStart = " | ", - RowEnd = Environment.NewLine, - ColumnEnd = " | " - }; + // Generate output file - var tableStr = tableInfo.ToString(); + Console.WriteLine("Generating report"); + File.WriteAllText("Report.md", report, Encoding.UTF8); - Console.WriteLine(); - Console.WriteLine(); - Console.WriteLine(tableStr); - Console.WriteLine(); - Console.WriteLine(); - } + Console.WriteLine("Completed"); } private static IEnumerable Scan( IEnumerable tagFileInfos, - IEnumerable scanFileInfos + IEnumerable scanFileInfos ) { var scanFileLookup = scanFileInfos.ToLookup(i => i.ImageName); - return tagFileInfos.Select(i => new ImageStats( - i.ImageName, - i.GetWords().OrderBy(w => w).ToArray(), - scanFileLookup[i.ImageName] - )); + foreach (var i in tagFileInfos) + { + yield return new ImageStats( + i.ImageName, + i.GetWords().Distinct().OrderBy(w => w).ToArray(), + scanFileLookup[i.ImageName] + ); + } } @@ -58,13 +59,13 @@ internal static class Program return Directory.EnumerateFiles(dir, "*.json").Select(TagFileInfo.FromPath); } - private static IEnumerable GetScanFileInfos(string dir) + private static IEnumerable GetScanFileInfos(string dir) { if (!Directory.Exists(dir)) { throw new ArgumentException($"Invalid scan results directory '{dir}'"); } - return Directory.EnumerateFiles(dir, "*.json").Select(ScannedResultInfo.FromPath); + return Directory.EnumerateFiles(dir, "*.json").Select(ScanFileInfo.FromPath); } } diff --git a/Examples/ReportGenerator/ReportGenerator.csproj b/Examples/ReportGenerator/ReportGenerator.csproj index 8c735f6..e8ab4a9 100644 --- a/Examples/ReportGenerator/ReportGenerator.csproj +++ b/Examples/ReportGenerator/ReportGenerator.csproj @@ -11,4 +11,8 @@ + + + + diff --git a/Examples/ReportGenerator/Table/ReportGenerator.cs b/Examples/ReportGenerator/Table/ReportGenerator.cs new file mode 100644 index 0000000..0895f7f --- /dev/null +++ b/Examples/ReportGenerator/Table/ReportGenerator.cs @@ -0,0 +1,171 @@ +using Common.Extensions; +using ReportGenerator.Models; +using System.Text; + +namespace ReportGenerator.Table +{ + public class ReportGenerator + { + private ICollection Images { get; } + + private readonly StringBuilder _sb = new(); + + private ReportGenerator(IEnumerable stats) => Images = stats.ToArray(); + + /// + public override string ToString() => _sb.ToString(); + + #region Fluent definition + + public ReportGenerator WithTitle(string text) + { + _sb.AppendHeading(1, text); + return this; + } + + public ReportGenerator WithFullStatistic(string title) + { + _sb.AppendHeading(2, title); + + foreach (var stat in Images) + { + _sb.AppendHeading(3, stat.ImageName); + _sb.AppendParagraph(HtmlImage(Path.Combine("img", stat.ImageName), 350, 350)); + + AppendRow(stat + .Reference + .Prepend("Image") + .Prepend("CER (avg)") + .Prepend("WER") + .Prepend("Elapsed") + .Prepend("Processor") + ); + + AppendRowSeparator(stat.Reference.Count + 5); + + var processors = stat.Processors + .OrderBy(s => s.Distance) + .ThenBy(s => s.ProcessingTime); + + foreach (var processor in processors) + { + var imgPath = Path.Combine("results", $"{processor.Name}.00.{stat.ImageName}.png"); + + AppendRow(processor.Words + .Select(s => s.ToString() ?? string.Empty) + .Prepend(HtmlImage(imgPath, 150, 150)) + .Prepend(processor.Words.Average(s => s.Distance).ToString("F2")) + .Prepend($"{processor.Distance * 100:F1}%") + .Prepend($"{processor.ProcessingTime * 1000:F1}ms") + .Prepend(processor.Name) + ); + } + + _sb.AppendLine(); + _sb.AppendParagraph( + $"*Comparison data generated based on {stat.Reference.Count} tagged words.*" + ); + } + + return this; + } + + + public ReportGenerator WithBestOf(string title, int context = 5) + { + _sb.AppendHeading(2, title); + + var lookup = Images + .SelectMany(s => s.Processors) + .ToLookup(p => p.Name); + + // Compare time across all images + var byTime = lookup + .Select(g => (Name: g.Key, Value: g.Average(p => p.ProcessingTime) * 1000)) + .OrderBy(g => g.Value); + + // Compare WER across all images + var byWer = lookup + .Select(g => (Name: g.Key, Value: g.Average(p => p.Distance) * 100)) + .OrderBy(g => g.Value); + + // Compare CER across all images + var byCer = lookup + .Select(g => (Name: g.Key, Value: g.Average(p => p.Words.Average(w => w.Distance)))) + .OrderBy(g => g.Value); + + // Print + AppendComparison(3, "Time", byTime, " ms"); + AppendComparison(3, "WER", byWer, " %"); + AppendComparison(3, "CER", byCer, " changes"); + + return this; + + void AppendComparison( + int level, + string tableTitle, + IEnumerable<(string, double)> values, + string valueUnit = "" + ) + { + var tValues = values.ToArray(); + var tContext = Math.Min(tValues.Length / 2, context); + + _sb.AppendHeading(level, tableTitle); + + AppendRow(new[] { "Processor", "Average" }); + AppendRowSeparator(2); + AppendRows(tValues.Take(tContext).Select(v => new[] + { + v.Item1, + v.Item2.ToString("F2") + valueUnit + })); + AppendRowSeparator(2, "..."); + AppendRows(tValues.TakeLast(tContext).Select(v => new[] + { + v.Item1, + v.Item2.ToString("F2") + valueUnit + })); + } + } + + #endregion + + #region Helpers + + private void AppendRow(IEnumerable row) + { + const string separator = " | "; + _sb.AppendLine(separator + string.Join(" | ", row) + separator); + } + + private void AppendRows(IEnumerable> rows) + { + foreach (var row in rows) + { + AppendRow(row); + } + } + + private static string HtmlImage(string path, int maxWidth, int maxHeight) + { + if (!path.EndsWith(".png")) + { + path += ".png"; + } + + return $""; + } + + private void AppendRowSeparator(int columns, string content = "---") => + AppendRow(Enumerable.Range(0, columns).Select(_ => content)); + + #endregion + + #region Factory Methods + + public static ReportGenerator FromData(IEnumerable stats) => new(stats); + + #endregion + } +}