Checkpoint
This commit is contained in:
@@ -45,13 +45,13 @@ internal class EvaluationProcessor
|
||||
.Use(new DuplicateFilter())
|
||||
.Complete(new RegexFilter(WordRegex));
|
||||
|
||||
private readonly IProcessor<MagickImage, MagickImage> _thresholdProcessor;
|
||||
private readonly StopwatchProcessor<MagickImage, MagickImage> _thresholdProcessor;
|
||||
|
||||
public string OutputFolder { get; init; } = "results";
|
||||
|
||||
public EvaluationProcessor(IProcessor<MagickImage, MagickImage> thresholdProcessor)
|
||||
{
|
||||
_thresholdProcessor = thresholdProcessor;
|
||||
_thresholdProcessor = new StopwatchProcessor<MagickImage, MagickImage>(thresholdProcessor);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
@@ -59,12 +59,21 @@ internal class EvaluationProcessor
|
||||
{
|
||||
Directory.CreateDirectory(OutputFolder);
|
||||
|
||||
var processor = MakeProcessor();
|
||||
var results = processor.Process(new[] { image }).Select(r => r.Word);
|
||||
var words = MakeProcessor()
|
||||
.Process(new[] { image })
|
||||
.Select(r => r.Word)
|
||||
.ToArray();
|
||||
|
||||
var result = new
|
||||
{
|
||||
Words = words.ToArray(),
|
||||
Elapsed = _thresholdProcessor.Elapsed?.TotalMilliseconds,
|
||||
};
|
||||
|
||||
var name = Path.GetFileNameWithoutExtension(image.FileName);
|
||||
await using var file = File.OpenWrite(Path.Combine(OutputFolder, $"{name}.{_thresholdProcessor}.json"));
|
||||
await JsonSerializer.SerializeAsync(file, results);
|
||||
var path = Path.Combine(OutputFolder, $"{name}.{_thresholdProcessor}.json");
|
||||
await using var file = File.OpenWrite(path);
|
||||
await JsonSerializer.SerializeAsync(file, result);
|
||||
});
|
||||
|
||||
private IProcessorChain<MagickImage, ScanResult> MakeProcessor()
|
||||
@@ -93,7 +102,8 @@ internal class EvaluationProcessor
|
||||
{
|
||||
var image = tImages[i].CloneImage();
|
||||
var name = Path.GetFileName(image.FileName);
|
||||
image.Write(Path.Combine(OutputFolder, $"{_thresholdProcessor}.{i:D2}.{name}"));
|
||||
var path = Path.Combine(OutputFolder, $"{_thresholdProcessor}.{i:D2}.{name}");
|
||||
image.Write(path);
|
||||
}
|
||||
|
||||
return tImages;
|
||||
|
||||
@@ -6,7 +6,7 @@
|
||||
},
|
||||
"Test all img": {
|
||||
"commandName": "Project",
|
||||
"commandLineArgs": "\"img/*\""
|
||||
"commandLineArgs": "\"img/*.png\""
|
||||
},
|
||||
"Test single img": {
|
||||
"commandName": "Project",
|
||||
|
||||
@@ -0,0 +1,32 @@
|
||||
using Process.Abstract;
|
||||
using Process.Interface;
|
||||
using System.Diagnostics;
|
||||
|
||||
public class StopwatchProcessor<TInput, TOutput> : Processor<TInput, TOutput>
|
||||
{
|
||||
private readonly IProcessor<TInput, TOutput> _processor;
|
||||
|
||||
/// <summary>
|
||||
/// Execution time of the last processing action
|
||||
/// </summary>
|
||||
public TimeSpan? Elapsed { get; private set; }
|
||||
|
||||
public StopwatchProcessor(IProcessor<TInput, TOutput> processor)
|
||||
{
|
||||
_processor = processor;
|
||||
}
|
||||
|
||||
|
||||
public override IEnumerable<TOutput> Process(IEnumerable<TInput> inputs)
|
||||
{
|
||||
var stopWatch = Stopwatch.StartNew();
|
||||
var results = _processor.Process(inputs);
|
||||
stopWatch.Stop();
|
||||
Elapsed = stopWatch.Elapsed;
|
||||
return results;
|
||||
}
|
||||
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string? ToString() => _processor.ToString();
|
||||
}
|
||||
@@ -12,7 +12,6 @@
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\Lookup\Lookup.Memory\Lookup.Memory.csproj" />
|
||||
<ProjectReference Include="..\..\Ocr\Ocr.Processors\Ocr.Processors.csproj" />
|
||||
<ProjectReference Include="..\..\Ocr\Ocr.Tesseract.Screenshots\Ocr.Tesseract.Screenshots.csproj" />
|
||||
<ProjectReference Include="..\..\Ocr\Ocr.Tesseract\Ocr.Tesseract.csproj" />
|
||||
<ProjectReference Include="..\..\Process\Process.Interface\Process.Interface.csproj" />
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace Common.Distance;
|
||||
|
||||
public static class Calculator
|
||||
{
|
||||
/// <summary>
|
||||
/// Calculates the levenshtein distance between
|
||||
/// </summary>
|
||||
/// <typeparam name="T"></typeparam>
|
||||
/// <param name="reference"></param>
|
||||
/// <param name="hypothesis"></param>
|
||||
/// <returns></returns>
|
||||
public static double GetDistance<T>(T reference, T? hypothesis)
|
||||
where T : IEnumerable
|
||||
{
|
||||
// Setup
|
||||
var refArr = reference.Cast<object>().ToArray();
|
||||
var hypArr = hypothesis?.Cast<object>().ToArray() ?? Array.Empty<object>();
|
||||
|
||||
if (!hypArr.Any())
|
||||
{
|
||||
return refArr.Length;
|
||||
// return double.PositiveInfinity; // Adjust penalty for empty scans
|
||||
}
|
||||
|
||||
if (Equals(refArr, hypArr))
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
var distance = new int[refArr.Length + 1, hypArr.Length + 1];
|
||||
|
||||
// Fill matrix
|
||||
for (var x = 0; x <= refArr.Length; x++)
|
||||
{
|
||||
// Reference on X axis
|
||||
distance[x, 0] = x;
|
||||
}
|
||||
|
||||
for (var y = 0; y <= hypArr.Length; y++)
|
||||
{
|
||||
// Hypothesis on Y axis
|
||||
distance[0, y] = y;
|
||||
}
|
||||
|
||||
// Calculate distance
|
||||
for (var x = 0; x < refArr.Length; x++)
|
||||
{
|
||||
for (var y = 0; y < hypArr.Length; y++)
|
||||
{
|
||||
// BL Cost depends on whether the two elements are equal
|
||||
var cost = Equals(refArr[x], hypArr[y]) ? 0 : 1;
|
||||
|
||||
// Apply distance mask
|
||||
var c1 = distance[x, y] + cost; // Bottom left
|
||||
|
||||
var c2 = distance[x, y + 1] + 1; // Top left
|
||||
var c3 = distance[x + 1, y] + 1; // Bottom right
|
||||
|
||||
distance[x + 1, y + 1] = Min(c1, c2, c3); // Top right
|
||||
}
|
||||
}
|
||||
|
||||
return distance[refArr.Length, hypArr.Length];
|
||||
}
|
||||
|
||||
private static T Min<T>(params T[] values)
|
||||
{
|
||||
if (!values.Any())
|
||||
{
|
||||
throw new ArgumentException("Array cannot be empty", nameof(values));
|
||||
}
|
||||
|
||||
return values.Min()!;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
using ReportGenerator.Models;
|
||||
using System.Collections;
|
||||
|
||||
namespace Common.Distance;
|
||||
|
||||
public readonly struct DistanceComparer<T> : IDistanceComparer<T>
|
||||
where T : IEnumerable
|
||||
{
|
||||
/// <inheritdoc />
|
||||
public T Reference { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public T? Hypothesis { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public double Distance { get; }
|
||||
|
||||
public DistanceComparer(T reference) : this(reference, default)
|
||||
{
|
||||
}
|
||||
|
||||
public DistanceComparer(T reference, T? hypothesis)
|
||||
{
|
||||
Reference = reference;
|
||||
Hypothesis = hypothesis;
|
||||
Distance = Calculator.GetDistance(Reference, Hypothesis);
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => Hypothesis switch
|
||||
{
|
||||
null => "`null`",
|
||||
var hyp when Equals(hyp, Reference) => Hypothesis.ToString() ?? string.Empty,
|
||||
_ => $"<strong style='color: orange;' title='CER: {Distance}'>{Hypothesis}</strong>"
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,27 @@
|
||||
using System.Collections;
|
||||
|
||||
namespace ReportGenerator.Models;
|
||||
|
||||
public interface IDistanceComparer
|
||||
{
|
||||
/// <summary>
|
||||
/// The calculated distance between
|
||||
/// <see cref="IDistanceComparer{T}.Reference"/> and
|
||||
/// <see cref="IDistanceComparer{T}.Hypothesis"/>
|
||||
/// </summary>
|
||||
public double Distance { get; }
|
||||
}
|
||||
|
||||
public interface IDistanceComparer<out T> : IDistanceComparer
|
||||
where T : IEnumerable
|
||||
{
|
||||
/// <summary>
|
||||
/// The comparison reference, meaning the "known to be correct" value
|
||||
/// </summary>
|
||||
public T Reference { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The value hypothesis, whose correctness is checked against <see cref="Reference"/>
|
||||
/// </summary>
|
||||
public T? Hypothesis { get; }
|
||||
}
|
||||
@@ -0,0 +1,15 @@
|
||||
using System.Text;
|
||||
|
||||
namespace Common.Extensions;
|
||||
|
||||
public static class StringBuilderExtensions
|
||||
{
|
||||
public static StringBuilder AppendHeading(this StringBuilder sb, int level, string text) => sb
|
||||
.Append(new string('#', level))
|
||||
.Append(' ')
|
||||
.AppendParagraph(text);
|
||||
|
||||
public static StringBuilder AppendParagraph(this StringBuilder sb, string text) => sb
|
||||
.AppendLine(text)
|
||||
.AppendLine();
|
||||
}
|
||||
@@ -1,14 +1,10 @@
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Common.Extensions;
|
||||
namespace Common.Extensions;
|
||||
|
||||
/// <summary>
|
||||
/// Extensions for the string object type
|
||||
/// </summary>
|
||||
public static class StringExtensions
|
||||
{
|
||||
private static readonly Regex patternRegex = new Regex(@"^\*$");
|
||||
|
||||
/// <summary>
|
||||
/// Determines whether this string contains the specified string. Not case sensitive.
|
||||
/// </summary>
|
||||
@@ -24,18 +20,20 @@ public static class StringExtensions
|
||||
/// </summary>
|
||||
/// <param name="self"></param>
|
||||
/// <returns></returns>
|
||||
public static ICollection<string> ExpandPath(this string self)
|
||||
public static IEnumerable<string> ExpandPath(this string self)
|
||||
{
|
||||
string pattern = Path.GetFileName(self);
|
||||
if (patternRegex.IsMatch(pattern))
|
||||
var parts = self.Split(Path.DirectorySeparatorChar);
|
||||
|
||||
var fileName = parts.Last();
|
||||
if (fileName.Contains('*') || fileName.Contains('?'))
|
||||
{
|
||||
return Directory.GetFiles(
|
||||
self.Substring(0, self.Length - pattern.Length),
|
||||
pattern,
|
||||
SearchOption.TopDirectoryOnly
|
||||
);
|
||||
// Path contains file pattern
|
||||
|
||||
var path = Path.Combine(parts.SkipLast(1).ToArray());
|
||||
return Directory.EnumerateFiles(path, fileName);
|
||||
}
|
||||
|
||||
// Path contains no pattern
|
||||
return new[] { self };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,61 +0,0 @@
|
||||
namespace ReportGenerator.Models;
|
||||
|
||||
internal readonly struct CharacterStats
|
||||
{
|
||||
public string Reference { get; }
|
||||
|
||||
public string Value { get; }
|
||||
|
||||
public double CharacterError { get; }
|
||||
|
||||
public CharacterStats(string reference)
|
||||
{
|
||||
Reference = reference;
|
||||
Value = string.Empty;
|
||||
CharacterError = double.PositiveInfinity;
|
||||
}
|
||||
|
||||
public CharacterStats(string reference, string value)
|
||||
{
|
||||
Value = value;
|
||||
Reference = reference;
|
||||
|
||||
CharacterError = CalculateCer(reference, value);
|
||||
}
|
||||
|
||||
private static double CalculateCer(string s1, string s2)
|
||||
{
|
||||
var distance = new int[s1.Length + 1, s2.Length + 1];
|
||||
|
||||
for (var i = 0; i <= s1.Length; i++)
|
||||
{
|
||||
distance[i, 0] = i;
|
||||
}
|
||||
|
||||
for (var j = 0; j <= s2.Length; j++)
|
||||
{
|
||||
distance[0, j] = j;
|
||||
}
|
||||
|
||||
for (var i = 1; i <= s1.Length; i++)
|
||||
{
|
||||
for (var j = 1; j <= s2.Length; j++)
|
||||
{
|
||||
var cost = s2[j - 1] == s1[i - 1] ? 0 : 1;
|
||||
|
||||
var c1 = Math.Min(distance[i - 1, j] + 1, distance[i, j - 1] + 1);
|
||||
var c2 = distance[i - 1, j - 1] + cost;
|
||||
distance[i, j] = Math.Min(c1, c2);
|
||||
}
|
||||
}
|
||||
|
||||
return distance[s1.Length, s2.Length];
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString()
|
||||
{
|
||||
var value = string.IsNullOrEmpty(Value) ? "`null`" : Value;
|
||||
return $"{value} ({CharacterError})";
|
||||
}
|
||||
}
|
||||
@@ -1,42 +1,27 @@
|
||||
namespace ReportGenerator.Models;
|
||||
|
||||
internal readonly struct ImageStats
|
||||
public readonly struct ImageStats
|
||||
{
|
||||
public string ImageName { get; } = string.Empty;
|
||||
public string ImageName { get; }
|
||||
|
||||
public ICollection<string> Reference { get; } = Array.Empty<string>();
|
||||
public ICollection<ProcessorStat> Stats { get; } = Array.Empty<ProcessorStat>();
|
||||
public ICollection<string> Reference { get; }
|
||||
|
||||
public ICollection<ProcessorStat> Processors { get; }
|
||||
|
||||
public ImageStats(
|
||||
string imageName,
|
||||
ICollection<string> taggedWords,
|
||||
IEnumerable<ScannedResultInfo> scanResult
|
||||
IEnumerable<ScanFileInfo> scanResult
|
||||
)
|
||||
{
|
||||
Reference = taggedWords;
|
||||
ImageName = imageName;
|
||||
Stats = scanResult
|
||||
.Select(t => new ProcessorStat(t.ProcessorName, taggedWords, t.GetWords()))
|
||||
Processors = scanResult
|
||||
.Select(t =>
|
||||
{
|
||||
var (elapsed, words) = t.GetData();
|
||||
return new ProcessorStat(t.ProcessorName, elapsed, taggedWords, words);
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
|
||||
public IEnumerable<IEnumerable<string>> ToTable()
|
||||
{
|
||||
// Title
|
||||
yield return Reference
|
||||
.Prepend("Processor")
|
||||
.Append("WER")
|
||||
.Append("CER (avg)")
|
||||
.Append("CER (sum)");
|
||||
|
||||
// Spacer
|
||||
yield return Enumerable.Range(0, Reference.Count + 4).Select(_ => "---");
|
||||
|
||||
// Content
|
||||
foreach (var stat in Stats)
|
||||
{
|
||||
yield return stat.ToRow();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,90 +1,82 @@
|
||||
namespace ReportGenerator.Models;
|
||||
using Common.Distance;
|
||||
|
||||
internal readonly struct ProcessorStat
|
||||
namespace ReportGenerator.Models;
|
||||
|
||||
public readonly struct ProcessorStat : IDistanceComparer<IEnumerable<string>>
|
||||
{
|
||||
public string ProcessorName { get; } = string.Empty;
|
||||
public ICollection<CharacterStats> CharacterStats { get; } = Array.Empty<CharacterStats>();
|
||||
public double WordError { get; } = double.PositiveInfinity;
|
||||
|
||||
public ProcessorStat(
|
||||
string processorName,
|
||||
ICollection<string> reference,
|
||||
ICollection<string> values
|
||||
)
|
||||
{
|
||||
ProcessorName = processorName;
|
||||
|
||||
WordError = CalculateWer(
|
||||
reference,
|
||||
values
|
||||
);
|
||||
|
||||
CharacterStats = GetCharacterStat(
|
||||
reference,
|
||||
values
|
||||
).ToArray();
|
||||
}
|
||||
|
||||
public IEnumerable<string> ToRow() => CharacterStats
|
||||
.Select(s => s.ToString())
|
||||
.Append(WordError.ToString("F2"))
|
||||
.Append(CharacterStats.Average(s => s.CharacterError).ToString("F2"))
|
||||
.Append(CharacterStats.Sum(s => s.CharacterError).ToString("F2"))
|
||||
.Prepend(ProcessorName);
|
||||
/// <summary>
|
||||
/// The name of the processor
|
||||
/// </summary>
|
||||
public string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Finds the smallest possible CER by calculating the levenshtein
|
||||
/// distance to every word and returning the most similar combination
|
||||
/// The total milliseconds it took the processor to process the data
|
||||
/// </summary>
|
||||
/// <returns></returns>
|
||||
private static IEnumerable<CharacterStats> GetCharacterStat(
|
||||
IEnumerable<string> reference,
|
||||
ICollection<string> values
|
||||
public double ProcessingTime { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<string> Reference { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<string>? Hypothesis { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public double Distance { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Information about the processed values
|
||||
/// </summary>
|
||||
public ICollection<IDistanceComparer<string>> Words { get; }
|
||||
|
||||
public ProcessorStat(
|
||||
string name,
|
||||
double processingTime,
|
||||
ICollection<string> reference,
|
||||
ICollection<string> hypothesis
|
||||
)
|
||||
{
|
||||
foreach (var refValue in reference)
|
||||
Name = name;
|
||||
ProcessingTime = processingTime;
|
||||
|
||||
Reference = reference;
|
||||
Hypothesis = hypothesis;
|
||||
|
||||
Distance = Calculator.GetDistance(
|
||||
reference.OrderBy(s => s).ToArray(),
|
||||
hypothesis.OrderBy(s => s).ToArray()
|
||||
) / reference.Count;
|
||||
|
||||
Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compares the <paramref name="reference"/> with all given <paramref name="values"/>
|
||||
/// and determines the <see cref="IDistanceComparer{T}"/> with the lowest error
|
||||
/// </summary>
|
||||
private static IDistanceComparer<string> GetDistanceInfo(
|
||||
string reference, IEnumerable<string> values
|
||||
)
|
||||
{
|
||||
var result = new DistanceComparer<string>(reference);
|
||||
|
||||
// Determine character stat with lowest error
|
||||
foreach (var value in values)
|
||||
{
|
||||
CharacterStats result = new CharacterStats(refValue);
|
||||
|
||||
foreach (var value in values)
|
||||
var stat = new DistanceComparer<string>(reference, value);
|
||||
if (stat.Distance > result.Distance)
|
||||
{
|
||||
var stat = new CharacterStats(refValue, value);
|
||||
if (stat.CharacterError > result.CharacterError)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
result = stat;
|
||||
|
||||
if (stat.CharacterError == 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
yield return result;
|
||||
}
|
||||
}
|
||||
result = stat;
|
||||
|
||||
static double CalculateWer(ICollection<string> expected, ICollection<string> actual)
|
||||
{
|
||||
if (!actual.Any())
|
||||
{
|
||||
return double.PositiveInfinity;
|
||||
if (stat.Distance == 0)
|
||||
{
|
||||
// We cannot go lower than zero, break
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
// Amount of words that need to be substituted to match the original
|
||||
int substitutions = expected
|
||||
.Zip(actual, (e, a) => string.Equals(e, a) ? 0 : 1)
|
||||
.Sum();
|
||||
|
||||
// todo this isn't correct i think
|
||||
// Amount of words dropped from the original
|
||||
int deletions = expected.Except(actual).Count();
|
||||
|
||||
// Amount of extra words added compared to the original
|
||||
int insertions = actual.Except(expected).Count();
|
||||
|
||||
return (substitutions + deletions + insertions) / (double)expected.Count;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
+13
-9
@@ -3,7 +3,7 @@ using System.Text.RegularExpressions;
|
||||
|
||||
namespace ReportGenerator.Models;
|
||||
|
||||
internal struct ScannedResultInfo
|
||||
public struct ScanFileInfo
|
||||
{
|
||||
public string Path { get; private init; }
|
||||
|
||||
@@ -16,22 +16,26 @@ internal struct ScannedResultInfo
|
||||
|
||||
public string ImageName { get; set; }
|
||||
|
||||
public ICollection<string> GetWords()
|
||||
public (double Elapsed, ICollection<string> Words) GetData()
|
||||
{
|
||||
using var file = File.OpenRead(Path);
|
||||
return JsonDocument
|
||||
.Parse(file)
|
||||
.RootElement
|
||||
var root = JsonDocument.Parse(file).RootElement;
|
||||
|
||||
var words = root
|
||||
.GetProperty("Words")
|
||||
.EnumerateArray()
|
||||
.Select(e =>
|
||||
e.GetProperty("Text").GetString() ?? throw new Exception("Cannot parse null words"))
|
||||
.Select(e => e.GetProperty("Text").GetString() ?? string.Empty)
|
||||
.ToArray();
|
||||
|
||||
var elapsed = root.GetProperty("Elapsed").GetDouble();
|
||||
|
||||
return (elapsed, words);
|
||||
}
|
||||
|
||||
public static ScannedResultInfo FromPath(string path)
|
||||
public static ScanFileInfo FromPath(string path)
|
||||
{
|
||||
var match = parseRegex.Match(System.IO.Path.GetFileName(path));
|
||||
return new ScannedResultInfo
|
||||
return new ScanFileInfo
|
||||
{
|
||||
Path = path,
|
||||
ProcessorName = match.Groups["processor"].Value,
|
||||
@@ -1,49 +0,0 @@
|
||||
namespace ReportGenerator.Models;
|
||||
|
||||
internal readonly struct TableInfo
|
||||
{
|
||||
public IEnumerable<IEnumerable<string>> Rows { get; } = Enumerable.Empty<IEnumerable<string>>();
|
||||
|
||||
public string Title { get; init; } = string.Empty;
|
||||
|
||||
public string RowStart { get; init; } = string.Empty;
|
||||
public string RowEnd { get; init; } = string.Empty;
|
||||
|
||||
public string ColumnStart { get; init; } = string.Empty;
|
||||
public string ColumnEnd { get; init; } = string.Empty;
|
||||
|
||||
public TableInfo(IEnumerable<IEnumerable<string>> rows)
|
||||
{
|
||||
Rows = rows;
|
||||
}
|
||||
|
||||
#region Overrides of ValueType
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString()
|
||||
{
|
||||
string result = string.Empty;
|
||||
|
||||
// Title
|
||||
result += Title;
|
||||
|
||||
// Body
|
||||
foreach (var row in Rows)
|
||||
{
|
||||
result += RowStart;
|
||||
|
||||
foreach (var column in row)
|
||||
{
|
||||
result += ColumnStart;
|
||||
result += column;
|
||||
result += ColumnEnd;
|
||||
}
|
||||
|
||||
result += RowEnd;
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using ReportGenerator.Models;
|
||||
using System.Text;
|
||||
|
||||
namespace ReportGenerator;
|
||||
|
||||
@@ -6,45 +7,45 @@ internal static class Program
|
||||
{
|
||||
internal static void Main(string[] args)
|
||||
{
|
||||
// Retrieve data
|
||||
|
||||
Console.WriteLine("Getting data");
|
||||
var tagFileInfos = GetTagFileInfos(args[0]);
|
||||
var scanFileInfos = GetScanFileInfos(args[1]);
|
||||
|
||||
Directory.CreateDirectory("reports");
|
||||
// Parse
|
||||
|
||||
var stats = Scan(tagFileInfos, scanFileInfos);
|
||||
Console.WriteLine("Evaluating");
|
||||
var scans = Scan(tagFileInfos, scanFileInfos);
|
||||
var report = Table.ReportGenerator
|
||||
.FromData(scans)
|
||||
.WithTitle("OCR Report")
|
||||
.WithBestOf("Best of")
|
||||
.WithFullStatistic("Statistic")
|
||||
.ToString();
|
||||
|
||||
foreach (var stat in stats)
|
||||
{
|
||||
var tableFields = stat.ToTable();
|
||||
var tableInfo = new TableInfo(tableFields)
|
||||
{
|
||||
Title = stat.ImageName + Environment.NewLine,
|
||||
RowStart = " | ",
|
||||
RowEnd = Environment.NewLine,
|
||||
ColumnEnd = " | "
|
||||
};
|
||||
// Generate output file
|
||||
|
||||
var tableStr = tableInfo.ToString();
|
||||
Console.WriteLine("Generating report");
|
||||
File.WriteAllText("Report.md", report, Encoding.UTF8);
|
||||
|
||||
Console.WriteLine();
|
||||
Console.WriteLine();
|
||||
Console.WriteLine(tableStr);
|
||||
Console.WriteLine();
|
||||
Console.WriteLine();
|
||||
}
|
||||
Console.WriteLine("Completed");
|
||||
}
|
||||
|
||||
private static IEnumerable<ImageStats> Scan(
|
||||
IEnumerable<TagFileInfo> tagFileInfos,
|
||||
IEnumerable<ScannedResultInfo> scanFileInfos
|
||||
IEnumerable<ScanFileInfo> scanFileInfos
|
||||
)
|
||||
{
|
||||
var scanFileLookup = scanFileInfos.ToLookup(i => i.ImageName);
|
||||
return tagFileInfos.Select(i => new ImageStats(
|
||||
i.ImageName,
|
||||
i.GetWords().OrderBy(w => w).ToArray(),
|
||||
scanFileLookup[i.ImageName]
|
||||
));
|
||||
foreach (var i in tagFileInfos)
|
||||
{
|
||||
yield return new ImageStats(
|
||||
i.ImageName,
|
||||
i.GetWords().Distinct().OrderBy(w => w).ToArray(),
|
||||
scanFileLookup[i.ImageName]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -58,13 +59,13 @@ internal static class Program
|
||||
return Directory.EnumerateFiles(dir, "*.json").Select(TagFileInfo.FromPath);
|
||||
}
|
||||
|
||||
private static IEnumerable<ScannedResultInfo> GetScanFileInfos(string dir)
|
||||
private static IEnumerable<ScanFileInfo> GetScanFileInfos(string dir)
|
||||
{
|
||||
if (!Directory.Exists(dir))
|
||||
{
|
||||
throw new ArgumentException($"Invalid scan results directory '{dir}'");
|
||||
}
|
||||
|
||||
return Directory.EnumerateFiles(dir, "*.json").Select(ScannedResultInfo.FromPath);
|
||||
return Directory.EnumerateFiles(dir, "*.json").Select(ScanFileInfo.FromPath);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -11,4 +11,8 @@
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="7.0.5" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
|
||||
@@ -0,0 +1,171 @@
|
||||
using Common.Extensions;
|
||||
using ReportGenerator.Models;
|
||||
using System.Text;
|
||||
|
||||
namespace ReportGenerator.Table
|
||||
{
|
||||
public class ReportGenerator
|
||||
{
|
||||
private ICollection<ImageStats> Images { get; }
|
||||
|
||||
private readonly StringBuilder _sb = new();
|
||||
|
||||
private ReportGenerator(IEnumerable<ImageStats> stats) => Images = stats.ToArray();
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => _sb.ToString();
|
||||
|
||||
#region Fluent definition
|
||||
|
||||
public ReportGenerator WithTitle(string text)
|
||||
{
|
||||
_sb.AppendHeading(1, text);
|
||||
return this;
|
||||
}
|
||||
|
||||
public ReportGenerator WithFullStatistic(string title)
|
||||
{
|
||||
_sb.AppendHeading(2, title);
|
||||
|
||||
foreach (var stat in Images)
|
||||
{
|
||||
_sb.AppendHeading(3, stat.ImageName);
|
||||
_sb.AppendParagraph(HtmlImage(Path.Combine("img", stat.ImageName), 350, 350));
|
||||
|
||||
AppendRow(stat
|
||||
.Reference
|
||||
.Prepend("Image")
|
||||
.Prepend("CER (avg)")
|
||||
.Prepend("WER")
|
||||
.Prepend("Elapsed")
|
||||
.Prepend("Processor")
|
||||
);
|
||||
|
||||
AppendRowSeparator(stat.Reference.Count + 5);
|
||||
|
||||
var processors = stat.Processors
|
||||
.OrderBy(s => s.Distance)
|
||||
.ThenBy(s => s.ProcessingTime);
|
||||
|
||||
foreach (var processor in processors)
|
||||
{
|
||||
var imgPath = Path.Combine("results", $"{processor.Name}.00.{stat.ImageName}.png");
|
||||
|
||||
AppendRow(processor.Words
|
||||
.Select(s => s.ToString() ?? string.Empty)
|
||||
.Prepend(HtmlImage(imgPath, 150, 150))
|
||||
.Prepend(processor.Words.Average(s => s.Distance).ToString("F2"))
|
||||
.Prepend($"{processor.Distance * 100:F1}%")
|
||||
.Prepend($"{processor.ProcessingTime * 1000:F1}ms")
|
||||
.Prepend(processor.Name)
|
||||
);
|
||||
}
|
||||
|
||||
_sb.AppendLine();
|
||||
_sb.AppendParagraph(
|
||||
$"*Comparison data generated based on {stat.Reference.Count} tagged words.*"
|
||||
);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
public ReportGenerator WithBestOf(string title, int context = 5)
|
||||
{
|
||||
_sb.AppendHeading(2, title);
|
||||
|
||||
var lookup = Images
|
||||
.SelectMany(s => s.Processors)
|
||||
.ToLookup(p => p.Name);
|
||||
|
||||
// Compare time across all images
|
||||
var byTime = lookup
|
||||
.Select(g => (Name: g.Key, Value: g.Average(p => p.ProcessingTime) * 1000))
|
||||
.OrderBy(g => g.Value);
|
||||
|
||||
// Compare WER across all images
|
||||
var byWer = lookup
|
||||
.Select(g => (Name: g.Key, Value: g.Average(p => p.Distance) * 100))
|
||||
.OrderBy(g => g.Value);
|
||||
|
||||
// Compare CER across all images
|
||||
var byCer = lookup
|
||||
.Select(g => (Name: g.Key, Value: g.Average(p => p.Words.Average(w => w.Distance))))
|
||||
.OrderBy(g => g.Value);
|
||||
|
||||
// Print
|
||||
AppendComparison(3, "Time", byTime, " ms");
|
||||
AppendComparison(3, "WER", byWer, " %");
|
||||
AppendComparison(3, "CER", byCer, " changes");
|
||||
|
||||
return this;
|
||||
|
||||
void AppendComparison(
|
||||
int level,
|
||||
string tableTitle,
|
||||
IEnumerable<(string, double)> values,
|
||||
string valueUnit = ""
|
||||
)
|
||||
{
|
||||
var tValues = values.ToArray();
|
||||
var tContext = Math.Min(tValues.Length / 2, context);
|
||||
|
||||
_sb.AppendHeading(level, tableTitle);
|
||||
|
||||
AppendRow(new[] { "Processor", "Average" });
|
||||
AppendRowSeparator(2);
|
||||
AppendRows(tValues.Take(tContext).Select(v => new[]
|
||||
{
|
||||
v.Item1,
|
||||
v.Item2.ToString("F2") + valueUnit
|
||||
}));
|
||||
AppendRowSeparator(2, "...");
|
||||
AppendRows(tValues.TakeLast(tContext).Select(v => new[]
|
||||
{
|
||||
v.Item1,
|
||||
v.Item2.ToString("F2") + valueUnit
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region Helpers
|
||||
|
||||
private void AppendRow(IEnumerable<string> row)
|
||||
{
|
||||
const string separator = " | ";
|
||||
_sb.AppendLine(separator + string.Join(" | ", row) + separator);
|
||||
}
|
||||
|
||||
private void AppendRows(IEnumerable<IEnumerable<string>> rows)
|
||||
{
|
||||
foreach (var row in rows)
|
||||
{
|
||||
AppendRow(row);
|
||||
}
|
||||
}
|
||||
|
||||
private static string HtmlImage(string path, int maxWidth, int maxHeight)
|
||||
{
|
||||
if (!path.EndsWith(".png"))
|
||||
{
|
||||
path += ".png";
|
||||
}
|
||||
|
||||
return $"<img src=\"{path}\" style=\"max-width:{maxWidth}px;max-height:{maxHeight}px;\" />";
|
||||
}
|
||||
|
||||
private void AppendRowSeparator(int columns, string content = "---") =>
|
||||
AppendRow(Enumerable.Range(0, columns).Select(_ => content));
|
||||
|
||||
#endregion
|
||||
|
||||
#region Factory Methods
|
||||
|
||||
public static ReportGenerator FromData(IEnumerable<ImageStats> stats) => new(stats);
|
||||
|
||||
#endregion
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user