Reworked distance info algo

This commit is contained in:
Simon Gruber
2023-12-04 18:22:08 +01:00
parent 9102161b7d
commit 0b01b41685
6 changed files with 97 additions and 76 deletions
+12 -5
View File
@@ -26,11 +26,18 @@ public readonly struct DistanceComparer<T> : IDistanceComparer<T>
Distance = Calculator.GetDistance(Reference, Hypothesis);
}
/// <inheritdoc />
public override string ToString() => Hypothesis switch
public override string ToString()
{
null => "`null`",
var hyp when Equals(hyp, Reference) => Hypothesis.ToString() ?? string.Empty,
_ => $"<strong style='color: orange;' title='CER: {Distance}'>{Hypothesis}</strong>"
};
var str = Hypothesis?.ToString();
if (Hypothesis is var hyp && Equals(hyp, Reference))
{
return str ?? string.Empty;
}
return
$"<strong style='color: orange;' title='REf: {Reference}, CER: {Distance}'>{str ?? "-"}</strong>";
}
}
@@ -80,39 +80,8 @@ public class HtmlDocumentGenerator : DocumentGeneratorBase
AppendParagraph(text, default);
/// <inheritdoc />
public override IDocumentGenerator AppendHeading(int level, string text)
{
if (_sectionLevel > 0)
{
// todo ??
}
var delta = level - _sectionLevel;
switch (delta)
{
case > 0:
for (int i = 0; i < delta; i++)
{
Append("<div>");
}
break;
case < 0:
for (int i = delta; i < 0; i++)
{
Append("</div>");
}
break;
}
Append("</div>");
Append("<div>");
_sectionLevel = level;
return Append($"<h{level}>{text}</h{level}>");
}
public override IDocumentGenerator AppendHeading(int level, string text) =>
Append($"<h{level} id=\"{text}\">{text}</h{level}>");
/// <inheritdoc />
protected override ITableGenerator MakeTable(int columns, Stream stream) =>
@@ -46,50 +46,40 @@ public readonly struct ProcessorStat : IDistanceComparer<IEnumerable<string>>
hypothesis.OrderBy(s => s).ToArray()
) / reference.Count;
Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
Words = GetDistanceInfos(reference, hypothesis).ToArray();
// Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
}
private static ICollection<IDistanceComparer<string>> GetDistanceInfos(
ICollection<string> reference,
ICollection<string> hypothesis
)
{
// todo avoid matching the same reference with a value multiple times
throw new NotImplementedException();
}
/// <summary>
/// Compares the <paramref name="reference"/> with all given <paramref name="values"/>
/// and determines the <see cref="IDistanceComparer{T}"/> with the lowest error
/// Compares the <paramref name="referenceCollection"/> with all given values in the
/// <paramref name="hypothesisCollection"/>, determining the <see cref="IDistanceComparer{T}"/>
/// with the lowest error
/// </summary>
private static IDistanceComparer<string> GetDistanceInfo(
string reference,
IEnumerable<string> values
private static IEnumerable<IDistanceComparer<string>> GetDistanceInfos(
ICollection<string> referenceCollection,
ICollection<string> hypothesisCollection
)
{
var result = new DistanceComparer<string>(reference);
var results = new List<IDistanceComparer<string>>();
// Determine character stat with lowest error
foreach (var value in values)
foreach (var reference in referenceCollection)
{
// todo avoid matching the same reference with a value multiple times
var tResults = hypothesisCollection
.Select(hypothesis => new DistanceComparer<string>(reference, hypothesis))
.Cast<IDistanceComparer<string>>()
.ToList();
var stat = new DistanceComparer<string>(reference, value);
if (stat.Distance > result.Distance || (stat.Distance / reference.Length) > 0.6d)
{
// todo fine-tune threshold
continue;
}
result = stat;
if (stat.Distance <= 0)
{
// We cannot go lower than zero, break
return result;
}
results.AddRange(tResults);
}
return result;
var lookup = results
.OrderBy(result => result.Distance)
.ToLookup(result => result.Reference);
foreach (var reference in referenceCollection)
{
yield return lookup[reference].FirstOrDefault() ?? new DistanceComparer<string>(reference);
}
}
}
+4 -2
View File
@@ -1,4 +1,5 @@
using ReportGenerator.Generator.Generator;
using Common.Extensions;
using ReportGenerator.Generator.Generator;
using ReportGenerator.Models;
namespace ReportGenerator;
@@ -24,7 +25,8 @@ internal static class Program
using var report = new ReportGenerator("OCR Report", document, scans)
.AddComparison("Processing summary (Average)", v => v.Average())
// .AddComparison("Processing summary (Cumulative)", v => v.Sum())
// .AddComparison("Processing summary (Median)", v => v.Median())
.AddComparison("Processing summary (Median)", v => v.Median())
// .AddProcessorStats("Processor Stats")
.AddImageStatsFull("Scan Results");
Console.WriteLine($"Saved report to '{path}'");
+55 -2
View File
@@ -39,7 +39,55 @@ public class ReportGenerator : IDisposable
{
Document.AppendHeading(2, title);
// todo show best/worst images per processor
var processors = new Dictionary<string, ICollection<ImageStats>>();
foreach (var image in Images)
{
foreach (var processor in image.Processors)
{
if (processors.TryGetValue(processor.Name, out var images))
{
images.Add(image);
}
else
{
processors.Add(processor.Name, new List<ImageStats> { image });
}
}
}
foreach (var (processor, images) in processors)
{
var ordered = images
.Select(i => (Stats: i, Distance: i
.Processors
.Where(p => p.Name.Equals(processor))
.Select(p => p.Distance)
.Average()
))
.OrderBy(i => i.Distance)
.ToArray();
Document
.AppendHeading(3, processor)
.AppendTable(2, table =>
{
table.AppendHeader(new[] { "Image", "Preview", "Distance" });
foreach (var (stats, distance) in ordered)
{
var imgPath = Path.Combine("results", $"{processor}.00.{stats.ImageName}.png");
table.AppendRow(new[]
{
$"<a href=\"#{stats.ImageName}\">{stats.ImageName}</a>",
Document.FormatImage(imgPath, new Bounds(0, 150)),
distance.ToString("F2")
});
}
}
);
}
return this;
}
@@ -75,10 +123,15 @@ public class ReportGenerator : IDisposable
{
var imgPath = Path.Combine("results", $"{processor.Name}.00.{stat.ImageName}.png");
string summarizedCer = string.Empty;
summarizedCer = processor.Words
.Average(s => s.Distance)
.ToString("F2");
table.AppendRow(processor.Words
.Select(s => s.ToString() ?? string.Empty)
.Prepend(Document.FormatImage(imgPath, new Bounds(0, 150)))
.Prepend(processor.Words.Average(s => s.Distance).ToString("F2"))
.Prepend(summarizedCer)
.Prepend($"{processor.Distance * 100:F1}%")
.Prepend($"{processor.ProcessingTime * 1000:F1}ms")
.Prepend(processor.Name)
+1 -1
View File
File diff suppressed because one or more lines are too long