diff --git a/Examples/ReportGenerator/Models/CharacterStats.cs b/Examples/ReportGenerator/Models/CharacterStats.cs
new file mode 100644
index 0000000..c2a10f4
--- /dev/null
+++ b/Examples/ReportGenerator/Models/CharacterStats.cs
@@ -0,0 +1,61 @@
+namespace ReportGenerator.Models;
+
+internal readonly struct CharacterStats
+{
+ public string Reference { get; }
+
+ public string Value { get; }
+
+ public double CharacterError { get; }
+
+ public CharacterStats(string reference)
+ {
+ Reference = reference;
+ Value = string.Empty;
+ CharacterError = double.PositiveInfinity;
+ }
+
+ public CharacterStats(string reference, string value)
+ {
+ Value = value;
+ Reference = reference;
+
+ CharacterError = CalculateCer(reference, value);
+ }
+
+ private static double CalculateCer(string s1, string s2)
+ {
+ var distance = new int[s1.Length + 1, s2.Length + 1];
+
+ for (var i = 0; i <= s1.Length; i++)
+ {
+ distance[i, 0] = i;
+ }
+
+ for (var j = 0; j <= s2.Length; j++)
+ {
+ distance[0, j] = j;
+ }
+
+ for (var i = 1; i <= s1.Length; i++)
+ {
+ for (var j = 1; j <= s2.Length; j++)
+ {
+ var cost = s2[j - 1] == s1[i - 1] ? 0 : 1;
+
+ var c1 = Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1);
+ var c2 = distance[i - 1, j - 1] + cost;
+ distance[i, j] = Math.Min(c1, c2);
+ }
+ }
+
+ return distance[s1.Length, s2.Length];
+ }
+
+ ///
+ public override string ToString()
+ {
+ var value = string.IsNullOrEmpty(Value) ? "`null`" : Value;
+ return $"{value} ({CharacterError})";
+ }
+}
\ No newline at end of file
diff --git a/Examples/ReportGenerator/Models/ImageStats.cs b/Examples/ReportGenerator/Models/ImageStats.cs
new file mode 100644
index 0000000..2195731
--- /dev/null
+++ b/Examples/ReportGenerator/Models/ImageStats.cs
@@ -0,0 +1,52 @@
+namespace ReportGenerator.Models;
+
+internal readonly struct ImageStats
+{
+ public string ImageName { get; } = string.Empty;
+
+ public ICollection Reference { get; } = Array.Empty();
+ public ICollection Stats { get; } = Array.Empty();
+
+ public ImageStats(
+ string imageName,
+ ICollection taggedWords,
+ IEnumerable scanResult
+ )
+ {
+ Reference = taggedWords;
+ ImageName = imageName;
+ Stats = scanResult
+ .Select(t => new ProcessorStat(t.ProcessorName, taggedWords, t.GetWords()))
+ .ToArray();
+ }
+
+
+ public IEnumerable> ToTable()
+ {
+ // Title
+ yield return Stats.Select(s => s.ProcessorName).Prepend("Reference");
+
+ // Spacer
+ yield return Stats.Select(s => "---").Prepend("---");
+
+ // Content
+ foreach (var reference in Reference)
+ {
+ yield return Stats.SelectMany(s => s.ToRow(reference)).Prepend(reference);
+ }
+
+ // Spacer
+ yield return Stats.Select(s => "---").Prepend("---");
+
+ // Summaries
+ yield return Stats
+ .Select(s => s.CharacterStats.Average(s => s.CharacterError).ToString("F2"))
+ .Prepend("CER (avg)");
+ yield return Stats
+ .Select(s => s.CharacterStats.Sum(s => s.CharacterError).ToString("F2"))
+ .Prepend("CER (sum)");
+ yield return Stats
+ .Select(s => s.WordError.ToString("F2"))
+ .Prepend("WER");
+ }
+}
diff --git a/Examples/ReportGenerator/Models/ProcessorStat.cs b/Examples/ReportGenerator/Models/ProcessorStat.cs
new file mode 100644
index 0000000..be81390
--- /dev/null
+++ b/Examples/ReportGenerator/Models/ProcessorStat.cs
@@ -0,0 +1,85 @@
+namespace ReportGenerator.Models;
+
+internal readonly struct ProcessorStat
+{
+ public string ProcessorName { get; } = string.Empty;
+ public ICollection CharacterStats { get; } = Array.Empty();
+ public double WordError { get; } = double.PositiveInfinity;
+
+ public ProcessorStat(
+ string processorName,
+ ICollection reference,
+ ICollection values
+ )
+ {
+ ProcessorName = processorName;
+
+ WordError = CalculateWer(
+ reference,
+ values
+ );
+
+ CharacterStats = GetCharacterStat(
+ reference,
+ values
+ ).ToArray();
+ }
+
+ public IEnumerable ToRow(string word) => CharacterStats
+ .Where(s => string.Equals(s.Reference, word))
+ .Select(s => s.ToString());
+
+ ///
+ /// Finds the smallest possible CER by calculating the levenshtein
+ /// distance to every word and returning the most similar combination
+ ///
+ ///
+ private static IEnumerable GetCharacterStat(
+ IEnumerable reference,
+ ICollection values
+ )
+ {
+ foreach (var refValue in reference)
+ {
+ CharacterStats result = new CharacterStats(refValue);
+
+ foreach (var value in values)
+ {
+ var stat = new CharacterStats(refValue, value);
+ if (stat.CharacterError > result.CharacterError)
+ {
+ continue;
+ }
+
+ result = stat;
+
+ if (stat.CharacterError == 0)
+ {
+ break;
+ }
+ }
+
+ yield return result;
+ }
+ }
+
+ static double CalculateWer(ICollection expected, ICollection actual)
+ {
+ // Amount of words that need to be substituted to match the original
+ int substitutions = expected
+ .Zip(
+ actual,
+ (e, a) => string.Equals(e, a) ? 0 : 1
+ )
+ .Sum();
+
+ // todo this isn't correct i think
+ // Amount of words dropped from the original
+ int deletions = expected.Except(actual).Count();
+
+ // Amount of extra words added compared to the original
+ int insertions = actual.Except(expected).Count();
+
+ return (substitutions + deletions + insertions) / (double)expected.Count;
+ }
+}
diff --git a/Examples/ReportGenerator/ScannedResultInfo.cs b/Examples/ReportGenerator/Models/ScannedResultInfo.cs
similarity index 96%
rename from Examples/ReportGenerator/ScannedResultInfo.cs
rename to Examples/ReportGenerator/Models/ScannedResultInfo.cs
index f83aa9b..63f7e28 100644
--- a/Examples/ReportGenerator/ScannedResultInfo.cs
+++ b/Examples/ReportGenerator/Models/ScannedResultInfo.cs
@@ -1,7 +1,7 @@
using System.Text.Json;
using System.Text.RegularExpressions;
-namespace ReportGenerator;
+namespace ReportGenerator.Models;
internal struct ScannedResultInfo
{
diff --git a/Examples/ReportGenerator/Models/TableInfo.cs b/Examples/ReportGenerator/Models/TableInfo.cs
new file mode 100644
index 0000000..9758f81
--- /dev/null
+++ b/Examples/ReportGenerator/Models/TableInfo.cs
@@ -0,0 +1,49 @@
+namespace ReportGenerator.Models;
+
+internal readonly struct TableInfo
+{
+ public IEnumerable> Rows { get; } = Enumerable.Empty>();
+
+ public string Title { get; init; } = string.Empty;
+
+ public string RowStart { get; init; } = string.Empty;
+ public string RowEnd { get; init; } = string.Empty;
+
+ public string ColumnStart { get; init; } = string.Empty;
+ public string ColumnEnd { get; init; } = string.Empty;
+
+ public TableInfo(IEnumerable> rows)
+ {
+ Rows = rows;
+ }
+
+ #region Overrides of ValueType
+
+ ///
+ public override string ToString()
+ {
+ string result = string.Empty;
+
+ // Title
+ result += Title;
+
+ // Body
+ foreach (var row in Rows)
+ {
+ result += RowStart;
+
+ foreach (var column in row)
+ {
+ result += ColumnStart;
+ result += column;
+ result += ColumnEnd;
+ }
+
+ result += RowEnd;
+ }
+
+ return result;
+ }
+
+ #endregion
+}
\ No newline at end of file
diff --git a/Examples/ReportGenerator/Models/TagFileInfo.cs b/Examples/ReportGenerator/Models/TagFileInfo.cs
new file mode 100644
index 0000000..390842f
--- /dev/null
+++ b/Examples/ReportGenerator/Models/TagFileInfo.cs
@@ -0,0 +1,31 @@
+using System.Text.Json;
+
+namespace ReportGenerator.Models;
+
+internal struct TagFileInfo
+{
+ public string Path { get; private init; }
+
+ public string ImageName { get; set; }
+
+ public ICollection GetWords()
+ {
+ using var file = File.OpenRead(Path);
+ return JsonDocument
+ .Parse(file)
+ .RootElement
+ .GetProperty("words")
+ .EnumerateArray()
+ .Select(w => w.GetString() ?? throw new Exception("Cannot parse null words"))
+ .ToArray();
+ }
+
+ public static TagFileInfo FromPath(string path) => new()
+ {
+ Path = path,
+ ImageName = System.IO.Path.GetFileNameWithoutExtension(path),
+ };
+
+ ///
+ public override string ToString() => ImageName;
+}
diff --git a/Examples/ReportGenerator/Program.cs b/Examples/ReportGenerator/Program.cs
index 1fd2ccb..1c9782b 100644
--- a/Examples/ReportGenerator/Program.cs
+++ b/Examples/ReportGenerator/Program.cs
@@ -1,167 +1,52 @@
-namespace ReportGenerator;
+using ReportGenerator.Models;
-internal struct CharacterErrorInfo
-{
- public string TaggedWord { get; }
- public string? ScannedWord { get; set; } = null;
- public double CharacterError { get; set; } = double.PositiveInfinity;
-
- public CharacterErrorInfo(string taggedWord) => TaggedWord = taggedWord;
-}
-
-internal struct WordErrorInfo
-{
- public double WordError { get; set; } = double.PositiveInfinity;
-
- public ICollection Words { get; } = new List();
-
- public double CharacterErrorAvg => Words.Average(i => i.CharacterError);
-
- public WordErrorInfo()
- {
- }
-}
-
-internal struct ScanTable
-{
- public string ImageName { get; set; }
-
- public ICollection Scans { get; set; }
-}
-
-internal struct ScanTableRow
-{
- public string ScannerName { get; set; }
-
- public ICollection ScannedWords { get; set; }
-}
+namespace ReportGenerator;
internal static class Program
{
internal static void Main(string[] args)
{
- var errorInfos = new List<(ScannedResultInfo scan, WordErrorInfo error)>();
-
var tagFileInfos = GetTagFileInfos(args[0]);
- var scanFileInfos = GetScanFileInfos(args[1]).ToLookup(i => i.ImageName);
+ var scanFileInfos = GetScanFileInfos(args[1]);
Directory.CreateDirectory("reports");
- foreach (var tagFileInfo in tagFileInfos)
- {
- var taggedWords = tagFileInfo.GetWords();
+ var stats = Scan(tagFileInfos, scanFileInfos);
- foreach (var scanFileInfo in scanFileInfos[tagFileInfo.ImageName])
+ foreach (var stat in stats)
+ {
+ var tableFields = stat.ToTable();
+ var tableInfo = new TableInfo(tableFields)
{
- var scannedWords = scanFileInfo.GetWords();
- if (!scannedWords.Any())
- {
- continue;
- }
+ Title = stat.ImageName + Environment.NewLine,
+ RowStart = " | ",
+ RowEnd = Environment.NewLine,
+ ColumnEnd = " | "
+ };
- // Calculate WER by comparing all tagged with all scanned words
- var wordErrorInfo = new WordErrorInfo
- {
- WordError = CalculateWer(taggedWords, scannedWords),
- };
+ var tableStr = tableInfo.ToString();
- // Calculate CER for each tagged word
- foreach (var taggedWord in taggedWords)
- {
- var characterErrorInfo = new CharacterErrorInfo(taggedWord);
-
- foreach (var scannedWord in scannedWords)
- {
- // Calculates the levenshtein distance to every word and returns the most similar combination
- var err = CalculateCer(taggedWord, scannedWord);
-
- if (err < characterErrorInfo.CharacterError)
- {
- characterErrorInfo.ScannedWord = scannedWord;
- characterErrorInfo.CharacterError = err;
-
- if (err == 0)
- {
- break;
- }
- }
- }
-
- wordErrorInfo.Words.Add(characterErrorInfo);
- }
-
- errorInfos.Add((scanFileInfo, wordErrorInfo));
- }
+ Console.WriteLine();
+ Console.WriteLine();
+ Console.WriteLine(tableStr);
+ Console.WriteLine();
+ Console.WriteLine();
}
-
- // Somewhat off based on the amount of expected words
- // If a processor did scan nothing at all this value can be very low
- var bestCharErrorProcessor = errorInfos
- .GroupBy(e => e.scan.ProcessorName, e => e.error)
- .Select(g => (g.Key, g.Average(i => i.CharacterErrorAvg)))
- .OrderBy(g => g.Item2)
- .ToArray();
-
- // Same here but with less impact
- var bestWordErrorProcessor = errorInfos
- .GroupBy(e => e.scan.ProcessorName, e => e.error)
- .Select(g => (g.Key, g.Average(i => i.WordError)))
- .OrderBy(g => g.Item2)
- .ToArray();
}
- static double CalculateCer(string s1, string s2)
+ private static IEnumerable Scan(
+ IEnumerable tagFileInfos,
+ IEnumerable scanFileInfos
+ )
{
- if (string.IsNullOrEmpty(s1) || string.IsNullOrEmpty(s2))
- {
- return 0;
- }
-
- var distance = new int[s1.Length + 1, s2.Length + 1];
-
- for (var i = 0; i <= s1.Length; i++)
- {
- distance[i, 0] = i;
- }
-
- for (var j = 0; j <= s2.Length; j++)
- {
- distance[0, j] = j;
- }
-
- for (var i = 1; i <= s1.Length; i++)
- {
- for (var j = 1; j <= s2.Length; j++)
- {
- var cost = s2[j - 1] == s1[i - 1] ? 0 : 1;
-
- var c1 = Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1);
- var c2 = distance[i - 1, j - 1] + cost;
- distance[i, j] = Math.Min(c1, c2);
- }
- }
-
- return distance[s1.Length, s2.Length];
+ var scanFileLookup = scanFileInfos.ToLookup(i => i.ImageName);
+ return tagFileInfos.Select(i => new ImageStats(
+ i.ImageName,
+ i.GetWords(),
+ scanFileLookup[i.ImageName]
+ ));
}
- static double CalculateWer(ICollection expected, ICollection actual)
- {
- // Amount of words that need to be substituted to match the original
- int substitutions = expected
- .Zip(
- actual,
- (e, a) => string.Equals(e, a) ? 0 : 1
- )
- .Sum();
-
- // Amount of words dropped from the original
- int deletions = expected.Except(actual).Count();
-
- // Amount of extra words added compared to the original
- int insertions = actual.Except(expected).Count();
-
- return (substitutions + deletions + insertions) / (double)expected.Count;
- }
private static IEnumerable GetTagFileInfos(string dir)
{
diff --git a/Examples/ReportGenerator/ReportGenerator.csproj b/Examples/ReportGenerator/ReportGenerator.csproj
index 74abf5c..8c735f6 100644
--- a/Examples/ReportGenerator/ReportGenerator.csproj
+++ b/Examples/ReportGenerator/ReportGenerator.csproj
@@ -7,4 +7,8 @@
enable
+
+
+
+
diff --git a/Examples/ReportGenerator/TagFileInfo.cs b/Examples/ReportGenerator/TagFileInfo.cs
deleted file mode 100644
index 14a1d1b..0000000
--- a/Examples/ReportGenerator/TagFileInfo.cs
+++ /dev/null
@@ -1,31 +0,0 @@
-using System.Text.Json;
-
-namespace ReportGenerator;
-
-internal struct TagFileInfo
-{
- public string Path { get; private init; }
-
- public string ImageName { get; set; }
-
- public ICollection GetWords()
- {
- using var file = File.OpenRead(Path);
- return JsonDocument
- .Parse(file)
- .RootElement
- .GetProperty("words")
- .EnumerateArray()
- .Select(w => w.GetString() ?? throw new Exception("Cannot parse null words"))
- .ToArray();
- }
-
- public static TagFileInfo FromPath(string path) => new()
- {
- Path = path,
- ImageName = System.IO.Path.GetFileNameWithoutExtension(path),
- };
-
- ///
- public override string ToString() => ImageName;
-}