Reworked distance info algo
This commit is contained in:
@@ -26,11 +26,18 @@ public readonly struct DistanceComparer<T> : IDistanceComparer<T>
|
||||
Distance = Calculator.GetDistance(Reference, Hypothesis);
|
||||
}
|
||||
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => Hypothesis switch
|
||||
public override string ToString()
|
||||
{
|
||||
null => "`null`",
|
||||
var hyp when Equals(hyp, Reference) => Hypothesis.ToString() ?? string.Empty,
|
||||
_ => $"<strong style='color: orange;' title='CER: {Distance}'>{Hypothesis}</strong>"
|
||||
};
|
||||
var str = Hypothesis?.ToString();
|
||||
|
||||
if (Hypothesis is var hyp && Equals(hyp, Reference))
|
||||
{
|
||||
return str ?? string.Empty;
|
||||
}
|
||||
|
||||
return
|
||||
$"<strong style='color: orange;' title='REf: {Reference}, CER: {Distance}'>{str ?? "-"}</strong>";
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,39 +80,8 @@ public class HtmlDocumentGenerator : DocumentGeneratorBase
|
||||
AppendParagraph(text, default);
|
||||
|
||||
/// <inheritdoc />
|
||||
public override IDocumentGenerator AppendHeading(int level, string text)
|
||||
{
|
||||
if (_sectionLevel > 0)
|
||||
{
|
||||
// todo ??
|
||||
}
|
||||
|
||||
var delta = level - _sectionLevel;
|
||||
switch (delta)
|
||||
{
|
||||
case > 0:
|
||||
for (int i = 0; i < delta; i++)
|
||||
{
|
||||
Append("<div>");
|
||||
}
|
||||
|
||||
break;
|
||||
case < 0:
|
||||
for (int i = delta; i < 0; i++)
|
||||
{
|
||||
Append("</div>");
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
Append("</div>");
|
||||
Append("<div>");
|
||||
|
||||
_sectionLevel = level;
|
||||
|
||||
return Append($"<h{level}>{text}</h{level}>");
|
||||
}
|
||||
public override IDocumentGenerator AppendHeading(int level, string text) =>
|
||||
Append($"<h{level} id=\"{text}\">{text}</h{level}>");
|
||||
|
||||
/// <inheritdoc />
|
||||
protected override ITableGenerator MakeTable(int columns, Stream stream) =>
|
||||
|
||||
@@ -46,50 +46,40 @@ public readonly struct ProcessorStat : IDistanceComparer<IEnumerable<string>>
|
||||
hypothesis.OrderBy(s => s).ToArray()
|
||||
) / reference.Count;
|
||||
|
||||
Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
|
||||
Words = GetDistanceInfos(reference, hypothesis).ToArray();
|
||||
// Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
|
||||
}
|
||||
|
||||
private static ICollection<IDistanceComparer<string>> GetDistanceInfos(
|
||||
ICollection<string> reference,
|
||||
ICollection<string> hypothesis
|
||||
)
|
||||
{
|
||||
// todo avoid matching the same reference with a value multiple times
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compares the <paramref name="reference"/> with all given <paramref name="values"/>
|
||||
/// and determines the <see cref="IDistanceComparer{T}"/> with the lowest error
|
||||
/// Compares the <paramref name="referenceCollection"/> with all given values in the
|
||||
/// <paramref name="hypothesisCollection"/>, determining the <see cref="IDistanceComparer{T}"/>
|
||||
/// with the lowest error
|
||||
/// </summary>
|
||||
private static IDistanceComparer<string> GetDistanceInfo(
|
||||
string reference,
|
||||
IEnumerable<string> values
|
||||
private static IEnumerable<IDistanceComparer<string>> GetDistanceInfos(
|
||||
ICollection<string> referenceCollection,
|
||||
ICollection<string> hypothesisCollection
|
||||
)
|
||||
{
|
||||
var result = new DistanceComparer<string>(reference);
|
||||
var results = new List<IDistanceComparer<string>>();
|
||||
|
||||
// Determine character stat with lowest error
|
||||
foreach (var value in values)
|
||||
foreach (var reference in referenceCollection)
|
||||
{
|
||||
// todo avoid matching the same reference with a value multiple times
|
||||
var tResults = hypothesisCollection
|
||||
.Select(hypothesis => new DistanceComparer<string>(reference, hypothesis))
|
||||
.Cast<IDistanceComparer<string>>()
|
||||
.ToList();
|
||||
|
||||
var stat = new DistanceComparer<string>(reference, value);
|
||||
if (stat.Distance > result.Distance || (stat.Distance / reference.Length) > 0.6d)
|
||||
{
|
||||
// todo fine-tune threshold
|
||||
continue;
|
||||
}
|
||||
|
||||
result = stat;
|
||||
|
||||
if (stat.Distance <= 0)
|
||||
{
|
||||
// We cannot go lower than zero, break
|
||||
return result;
|
||||
}
|
||||
results.AddRange(tResults);
|
||||
}
|
||||
|
||||
return result;
|
||||
var lookup = results
|
||||
.OrderBy(result => result.Distance)
|
||||
.ToLookup(result => result.Reference);
|
||||
|
||||
foreach (var reference in referenceCollection)
|
||||
{
|
||||
yield return lookup[reference].FirstOrDefault() ?? new DistanceComparer<string>(reference);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using ReportGenerator.Generator.Generator;
|
||||
using Common.Extensions;
|
||||
using ReportGenerator.Generator.Generator;
|
||||
using ReportGenerator.Models;
|
||||
|
||||
namespace ReportGenerator;
|
||||
@@ -24,7 +25,8 @@ internal static class Program
|
||||
using var report = new ReportGenerator("OCR Report", document, scans)
|
||||
.AddComparison("Processing summary (Average)", v => v.Average())
|
||||
// .AddComparison("Processing summary (Cumulative)", v => v.Sum())
|
||||
// .AddComparison("Processing summary (Median)", v => v.Median())
|
||||
.AddComparison("Processing summary (Median)", v => v.Median())
|
||||
// .AddProcessorStats("Processor Stats")
|
||||
.AddImageStatsFull("Scan Results");
|
||||
|
||||
Console.WriteLine($"Saved report to '{path}'");
|
||||
|
||||
@@ -39,7 +39,55 @@ public class ReportGenerator : IDisposable
|
||||
{
|
||||
Document.AppendHeading(2, title);
|
||||
|
||||
// todo show best/worst images per processor
|
||||
var processors = new Dictionary<string, ICollection<ImageStats>>();
|
||||
foreach (var image in Images)
|
||||
{
|
||||
foreach (var processor in image.Processors)
|
||||
{
|
||||
if (processors.TryGetValue(processor.Name, out var images))
|
||||
{
|
||||
images.Add(image);
|
||||
}
|
||||
else
|
||||
{
|
||||
processors.Add(processor.Name, new List<ImageStats> { image });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var (processor, images) in processors)
|
||||
{
|
||||
var ordered = images
|
||||
.Select(i => (Stats: i, Distance: i
|
||||
.Processors
|
||||
.Where(p => p.Name.Equals(processor))
|
||||
.Select(p => p.Distance)
|
||||
.Average()
|
||||
))
|
||||
.OrderBy(i => i.Distance)
|
||||
.ToArray();
|
||||
|
||||
Document
|
||||
.AppendHeading(3, processor)
|
||||
.AppendTable(2, table =>
|
||||
{
|
||||
table.AppendHeader(new[] { "Image", "Preview", "Distance" });
|
||||
|
||||
foreach (var (stats, distance) in ordered)
|
||||
{
|
||||
var imgPath = Path.Combine("results", $"{processor}.00.{stats.ImageName}.png");
|
||||
|
||||
table.AppendRow(new[]
|
||||
{
|
||||
$"<a href=\"#{stats.ImageName}\">{stats.ImageName}</a>",
|
||||
Document.FormatImage(imgPath, new Bounds(0, 150)),
|
||||
distance.ToString("F2")
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
@@ -75,10 +123,15 @@ public class ReportGenerator : IDisposable
|
||||
{
|
||||
var imgPath = Path.Combine("results", $"{processor.Name}.00.{stat.ImageName}.png");
|
||||
|
||||
string summarizedCer = string.Empty;
|
||||
summarizedCer = processor.Words
|
||||
.Average(s => s.Distance)
|
||||
.ToString("F2");
|
||||
|
||||
table.AppendRow(processor.Words
|
||||
.Select(s => s.ToString() ?? string.Empty)
|
||||
.Prepend(Document.FormatImage(imgPath, new Bounds(0, 150)))
|
||||
.Prepend(processor.Words.Average(s => s.Distance).ToString("F2"))
|
||||
.Prepend(summarizedCer)
|
||||
.Prepend($"{processor.Distance * 100:F1}%")
|
||||
.Prepend($"{processor.ProcessingTime * 1000:F1}ms")
|
||||
.Prepend(processor.Name)
|
||||
|
||||
Vendored
+1
-1
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user