Renamed Examples to Implementation
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
namespace Ocr.Report.Models;
|
||||
|
||||
public readonly struct ImageStats
|
||||
{
|
||||
public string ImageName { get; }
|
||||
|
||||
public ICollection<string> Reference { get; }
|
||||
|
||||
public ICollection<ProcessorStat> Processors { get; }
|
||||
|
||||
public ImageStats(
|
||||
string imageName,
|
||||
ICollection<string> taggedWords,
|
||||
IEnumerable<ScanFileInfo> scanResult
|
||||
)
|
||||
{
|
||||
Reference = taggedWords;
|
||||
ImageName = imageName;
|
||||
Processors = scanResult
|
||||
.Select(t =>
|
||||
{
|
||||
var (elapsed, words) = t.GetData();
|
||||
return new ProcessorStat(t.ProcessorName, elapsed, taggedWords, words);
|
||||
})
|
||||
.ToArray();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,85 @@
|
||||
using Common.Distance;
|
||||
|
||||
namespace Ocr.Report.Models;
|
||||
|
||||
public readonly struct ProcessorStat : IDistanceComparer<IEnumerable<string>>
|
||||
{
|
||||
/// <summary>
|
||||
/// The name of the processor
|
||||
/// </summary>
|
||||
public string Name { get; }
|
||||
|
||||
/// <summary>
|
||||
/// The total milliseconds it took the processor to process the data
|
||||
/// </summary>
|
||||
public double ProcessingTime { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<string> Reference { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public IEnumerable<string>? Hypothesis { get; }
|
||||
|
||||
/// <inheritdoc />
|
||||
public double Distance { get; }
|
||||
|
||||
/// <summary>
|
||||
/// Information about the processed values
|
||||
/// </summary>
|
||||
public ICollection<IDistanceComparer<string>> Words { get; }
|
||||
|
||||
public ProcessorStat(
|
||||
string name,
|
||||
double processingTime,
|
||||
ICollection<string> reference,
|
||||
ICollection<string> hypothesis
|
||||
)
|
||||
{
|
||||
Name = name;
|
||||
ProcessingTime = processingTime;
|
||||
|
||||
Reference = reference;
|
||||
Hypothesis = hypothesis;
|
||||
|
||||
Distance = Calculator.GetDistance(
|
||||
reference.OrderBy(s => s).ToArray(),
|
||||
hypothesis.OrderBy(s => s).ToArray()
|
||||
) / reference.Count;
|
||||
|
||||
Words = GetDistanceInfos(reference, hypothesis).ToArray();
|
||||
// Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
|
||||
}
|
||||
|
||||
|
||||
/// <summary>
|
||||
/// Compares the <paramref name="referenceCollection"/> with all given values in the
|
||||
/// <paramref name="hypothesisCollection"/>, determining the <see cref="IDistanceComparer{T}"/>
|
||||
/// with the lowest error
|
||||
/// </summary>
|
||||
private static IEnumerable<IDistanceComparer<string>> GetDistanceInfos(
|
||||
ICollection<string> referenceCollection,
|
||||
ICollection<string> hypothesisCollection
|
||||
)
|
||||
{
|
||||
var results = new List<IDistanceComparer<string>>();
|
||||
|
||||
foreach (var reference in referenceCollection)
|
||||
{
|
||||
var tResults = hypothesisCollection
|
||||
.Select(hypothesis => new DistanceComparer<string>(reference, hypothesis))
|
||||
.Cast<IDistanceComparer<string>>()
|
||||
.ToList();
|
||||
|
||||
results.AddRange(tResults);
|
||||
}
|
||||
|
||||
var lookup = results
|
||||
.OrderBy(result => result.Distance)
|
||||
.ToLookup(result => result.Reference);
|
||||
|
||||
foreach (var reference in referenceCollection)
|
||||
{
|
||||
yield return lookup[reference].FirstOrDefault() ?? new DistanceComparer<string>(reference);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,48 @@
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace Ocr.Report.Models;
|
||||
|
||||
public struct ScanFileInfo
|
||||
{
|
||||
public string Path { get; private init; }
|
||||
|
||||
private static readonly Regex parseRegex = new(
|
||||
@"(?'image'.+)\.(?'processor'.+)\..+",
|
||||
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase
|
||||
);
|
||||
|
||||
public string ProcessorName { get; set; }
|
||||
|
||||
public string ImageName { get; set; }
|
||||
|
||||
public (double Elapsed, ICollection<string> Words) GetData()
|
||||
{
|
||||
using var file = File.OpenRead(Path);
|
||||
var root = JsonDocument.Parse(file).RootElement;
|
||||
|
||||
var words = root
|
||||
.GetProperty("Words")
|
||||
.EnumerateArray()
|
||||
.Select(e => e.GetProperty("Text").GetString() ?? string.Empty)
|
||||
.ToArray();
|
||||
|
||||
var elapsed = root.GetProperty("Elapsed").GetDouble();
|
||||
|
||||
return (elapsed, words);
|
||||
}
|
||||
|
||||
public static ScanFileInfo FromPath(string path)
|
||||
{
|
||||
var match = parseRegex.Match(System.IO.Path.GetFileName(path));
|
||||
return new ScanFileInfo
|
||||
{
|
||||
Path = path,
|
||||
ProcessorName = match.Groups["processor"].Value,
|
||||
ImageName = match.Groups["image"].Value
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => ImageName;
|
||||
}
|
||||
@@ -0,0 +1,31 @@
|
||||
using System.Text.Json;
|
||||
|
||||
namespace Ocr.Report.Models;
|
||||
|
||||
internal struct TagFileInfo
|
||||
{
|
||||
public string Path { get; private init; }
|
||||
|
||||
public string ImageName { get; set; }
|
||||
|
||||
public ICollection<string> GetWords()
|
||||
{
|
||||
using var file = File.OpenRead(Path);
|
||||
return JsonDocument
|
||||
.Parse(file)
|
||||
.RootElement
|
||||
.GetProperty("words")
|
||||
.EnumerateArray()
|
||||
.Select(w => w.GetString() ?? throw new Exception("Cannot parse null words"))
|
||||
.ToArray();
|
||||
}
|
||||
|
||||
public static TagFileInfo FromPath(string path) => new()
|
||||
{
|
||||
Path = path,
|
||||
ImageName = System.IO.Path.GetFileNameWithoutExtension(path),
|
||||
};
|
||||
|
||||
/// <inheritdoc />
|
||||
public override string ToString() => ImageName;
|
||||
}
|
||||
@@ -0,0 +1,24 @@
|
||||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
|
||||
<PropertyGroup>
|
||||
<OutputType>Exe</OutputType>
|
||||
<TargetFramework>net6.0</TargetFramework>
|
||||
<ImplicitUsings>enable</ImplicitUsings>
|
||||
<Nullable>enable</Nullable>
|
||||
</PropertyGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<None Remove="Properties\htmldocument-style.css" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="7.0.5" />
|
||||
</ItemGroup>
|
||||
|
||||
<ItemGroup>
|
||||
<ProjectReference Include="..\..\ReportGeneration\ReportGeneration.Generators\ReportGeneration.Generators.csproj" />
|
||||
<ProjectReference Include="..\..\ReportGeneration\ReportGeneration.Interface\ReportGeneration.Interface.csproj" />
|
||||
<ProjectReference Include="..\Common\Common.csproj" />
|
||||
</ItemGroup>
|
||||
|
||||
</Project>
|
||||
@@ -0,0 +1,78 @@
|
||||
using Common.Extensions;
|
||||
using Ocr.Report.Models;
|
||||
using ReportGeneration.Generators;
|
||||
|
||||
namespace Ocr.Report;
|
||||
|
||||
internal static class Program
|
||||
{
|
||||
internal static void Main(string[] args)
|
||||
{
|
||||
// Retrieve data
|
||||
|
||||
Console.WriteLine("Getting data");
|
||||
var tagFileInfos = GetTagFileInfos(args[0]);
|
||||
var scanFileInfos = GetScanFileInfos(args[1]);
|
||||
|
||||
// Parse
|
||||
|
||||
Console.WriteLine("Generating report");
|
||||
var scans = Scan(tagFileInfos, scanFileInfos);
|
||||
|
||||
var path = Path.GetFullPath("report.html");
|
||||
|
||||
using var document = new HtmlDocumentGenerator(path);
|
||||
using var report = new ReportGenerator("OCR Report", document, scans)
|
||||
.AddComparison("Processing summary (Average)", v =>
|
||||
{
|
||||
var result = v.Average(out var deviation);
|
||||
return (result, deviation);
|
||||
})
|
||||
// .AddComparison("Processing summary (Cumulative)", v => v.Sum())
|
||||
.AddComparison("Processing summary (Median)", v =>
|
||||
{
|
||||
var result = v.Median(out var deviation);
|
||||
return (result, deviation);
|
||||
})
|
||||
// .AddProcessorStats("Processor Stats")
|
||||
.AddImageStatsFull("Scan Results");
|
||||
|
||||
Console.WriteLine($"Saved report to '{path}'");
|
||||
}
|
||||
|
||||
private static IEnumerable<ImageStats> Scan(
|
||||
IEnumerable<TagFileInfo> tagFileInfos,
|
||||
IEnumerable<ScanFileInfo> scanFileInfos
|
||||
)
|
||||
{
|
||||
var scanFileLookup = scanFileInfos.ToLookup(i => i.ImageName);
|
||||
foreach (var i in tagFileInfos)
|
||||
{
|
||||
yield return new ImageStats(
|
||||
i.ImageName,
|
||||
i.GetWords().Distinct().OrderBy(w => w).ToArray(),
|
||||
scanFileLookup[i.ImageName]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
private static IEnumerable<TagFileInfo> GetTagFileInfos(string dir)
|
||||
{
|
||||
if (!Directory.Exists(dir))
|
||||
{
|
||||
throw new ArgumentException($"Invalid tagged data directory '{dir}'");
|
||||
}
|
||||
|
||||
return Directory.EnumerateFiles(dir, "*.json").Select(TagFileInfo.FromPath);
|
||||
}
|
||||
|
||||
private static IEnumerable<ScanFileInfo> GetScanFileInfos(string dir)
|
||||
{
|
||||
if (!Directory.Exists(dir))
|
||||
{
|
||||
throw new ArgumentException($"Invalid scan results directory '{dir}'");
|
||||
}
|
||||
|
||||
return Directory.EnumerateFiles(dir, "*.json").Select(ScanFileInfo.FromPath);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,9 @@
|
||||
{
|
||||
"profiles": {
|
||||
"ReportGenerator": {
|
||||
"commandName": "Project",
|
||||
"commandLineArgs": "\"img\" \"results\"",
|
||||
"workingDirectory": "D:\\git\\BA\\Examples\\testdata"
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,263 @@
|
||||
using Ocr.Report.Models;
|
||||
using ReportGeneration.Abstract.Model;
|
||||
using ReportGeneration.Interface;
|
||||
|
||||
namespace Ocr.Report;
|
||||
|
||||
public class ReportGenerator : IDisposable
|
||||
{
|
||||
private IDocumentGenerator Document { get; }
|
||||
|
||||
private ICollection<ImageStats> Images { get; }
|
||||
|
||||
public ReportGenerator(
|
||||
IDocumentGenerator document,
|
||||
IEnumerable<ImageStats> data
|
||||
)
|
||||
{
|
||||
Images = data.ToArray();
|
||||
|
||||
Document = document;
|
||||
document.Open();
|
||||
}
|
||||
|
||||
public ReportGenerator(
|
||||
string title,
|
||||
IDocumentGenerator document,
|
||||
IEnumerable<ImageStats> data
|
||||
) : this(document, data) => AddTitle(title);
|
||||
|
||||
#region Writing
|
||||
|
||||
public ReportGenerator AddTitle(string text)
|
||||
{
|
||||
Document.AppendHeading(1, text);
|
||||
return this;
|
||||
}
|
||||
|
||||
public ReportGenerator AddProcessorStats(string title)
|
||||
{
|
||||
Document.AppendHeading(2, title);
|
||||
|
||||
var processors = new Dictionary<string, ICollection<ImageStats>>();
|
||||
foreach (var image in Images)
|
||||
{
|
||||
foreach (var processor in image.Processors)
|
||||
{
|
||||
if (processors.TryGetValue(processor.Name, out var images))
|
||||
{
|
||||
images.Add(image);
|
||||
}
|
||||
else
|
||||
{
|
||||
processors.Add(processor.Name, new List<ImageStats> { image });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach (var (processor, images) in processors)
|
||||
{
|
||||
var ordered = images
|
||||
.Select(i => (Stats: i, Distance: i
|
||||
.Processors
|
||||
.Where(p => p.Name.Equals(processor))
|
||||
.Select(p => p.Distance)
|
||||
.Average()
|
||||
))
|
||||
.OrderBy(i => i.Distance)
|
||||
.ToArray();
|
||||
|
||||
Document
|
||||
.AppendHeading(3, processor)
|
||||
.AppendTable(2, table =>
|
||||
{
|
||||
table.AppendHeader(new[] { "Image", "Preview", "Distance" });
|
||||
|
||||
foreach (var (stats, distance) in ordered)
|
||||
{
|
||||
var imgPath = Path.Combine("results", $"{processor}.00.{stats.ImageName}.png");
|
||||
|
||||
table.AppendRow(new[]
|
||||
{
|
||||
$"<a href=\"#{stats.ImageName}\">{stats.ImageName}</a>",
|
||||
Document.FormatImage(imgPath, new Bounds(0, 150)),
|
||||
distance.ToString("F2")
|
||||
});
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public ReportGenerator AddImageStatsFull(string title)
|
||||
{
|
||||
Document.AppendHeading(2, title);
|
||||
|
||||
foreach (var stat in Images)
|
||||
{
|
||||
Document
|
||||
.AppendHeading(3, stat.ImageName)
|
||||
.AppendParagraph(
|
||||
Document.FormatImage(Path.Combine("img", stat.ImageName), new Bounds(0, 350))
|
||||
)
|
||||
.AppendTable(
|
||||
stat.Reference.Count + 5,
|
||||
table =>
|
||||
{
|
||||
table.AppendHeader(stat
|
||||
.Reference
|
||||
.Prepend("Image")
|
||||
.Prepend("CER (avg)")
|
||||
.Prepend("WER")
|
||||
.Prepend("Elapsed")
|
||||
.Prepend("Processor")
|
||||
);
|
||||
|
||||
var processors = stat.Processors
|
||||
.OrderBy(s => s.Distance)
|
||||
.ThenBy(s => s.ProcessingTime);
|
||||
|
||||
foreach (var processor in processors)
|
||||
{
|
||||
var imgPath = Path.Combine("results", $"{processor.Name}.00.{stat.ImageName}.png");
|
||||
|
||||
table.AppendRow(processor.Words
|
||||
.Select(s => s.ToString() ?? string.Empty)
|
||||
.Prepend(Document.FormatImage(imgPath, new Bounds(0, 150)))
|
||||
.Prepend(processor.Words.Average(s => s.Distance).ToString("F2"))
|
||||
.Prepend($"{processor.Distance * 100:F1}%")
|
||||
.Prepend($"{processor.ProcessingTime * 1000:F1}ms")
|
||||
.Prepend(processor.Name)
|
||||
);
|
||||
}
|
||||
})
|
||||
.AppendParagraph(
|
||||
$"Comparison data generated based on {stat.Reference.Count} tagged words."
|
||||
);
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public ReportGenerator AddComparison(
|
||||
string title,
|
||||
Func<IEnumerable<double>, (double, double)> evaluationFunc
|
||||
)
|
||||
{
|
||||
var lookup = Images
|
||||
.SelectMany(s => s.Processors)
|
||||
.ToLookup(p => p.Name);
|
||||
|
||||
Document.AppendHeading(2, title);
|
||||
|
||||
var byWer = lookup
|
||||
.Select(g =>
|
||||
{
|
||||
var (value, deviation) = evaluationFunc(g.Select(p => p.Distance * 100));
|
||||
return (
|
||||
Name: g.Key,
|
||||
Value: value,
|
||||
Deviation: deviation
|
||||
);
|
||||
})
|
||||
.OrderBy(g => g.Value);
|
||||
Document.AppendHeading(3, "WER");
|
||||
AppendComparison(("Error", "%"), byWer);
|
||||
|
||||
var byCer = lookup
|
||||
.Select(g =>
|
||||
{
|
||||
var (value, deviation) =
|
||||
evaluationFunc(g.SelectMany(p => p.Words, (_, word) => word.Distance));
|
||||
return (
|
||||
Name: g.Key,
|
||||
Value: value,
|
||||
Deviation: deviation
|
||||
);
|
||||
})
|
||||
.OrderBy(g => g.Value);
|
||||
Document.AppendHeading(3, "CER");
|
||||
AppendComparison(("Changes", string.Empty), byCer);
|
||||
|
||||
var byTime = lookup
|
||||
.Select(g =>
|
||||
{
|
||||
var (value, deviation) = evaluationFunc(g.Select(p => p.ProcessingTime * 1000));
|
||||
return (
|
||||
Name: g.Key,
|
||||
Value: value,
|
||||
Deviation: deviation
|
||||
);
|
||||
})
|
||||
.OrderBy(g => g.Value);
|
||||
Document.AppendHeading(3, "Time");
|
||||
AppendComparison(("Time", "ms"), byTime);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
private void AppendComparison(
|
||||
(string name, string unit)? valueInfo,
|
||||
IEnumerable<(string, double, double)> values
|
||||
)
|
||||
{
|
||||
const int context = 5;
|
||||
|
||||
var tValues = values.ToArray();
|
||||
var tContext = Math.Min(tValues.Length / 2, context);
|
||||
|
||||
Document.AppendTable(3, table =>
|
||||
{
|
||||
table
|
||||
.AppendHeader(new[]
|
||||
{
|
||||
"Processor",
|
||||
valueInfo?.name ?? "Value",
|
||||
"Deviation"
|
||||
})
|
||||
.AppendRows(tValues
|
||||
.Take(tContext)
|
||||
.Select(MakeRow))
|
||||
.AppendRow("...")
|
||||
.AppendRows(
|
||||
tValues
|
||||
.TakeLast(tContext)
|
||||
.Select(MakeRow)
|
||||
);
|
||||
|
||||
return;
|
||||
|
||||
string[] MakeRow((string, double, double) v) =>
|
||||
new[]
|
||||
{
|
||||
v.Item1,
|
||||
v.Item2.ToString("F2") + valueInfo?.unit,
|
||||
v.Item3.ToString("F2") + valueInfo?.unit
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
#endregion
|
||||
|
||||
#region IDisposable
|
||||
|
||||
protected virtual void Dispose(bool disposing)
|
||||
{
|
||||
if (disposing)
|
||||
{
|
||||
Document.Dispose();
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Dispose()
|
||||
{
|
||||
Dispose(true);
|
||||
GC.SuppressFinalize(this);
|
||||
}
|
||||
|
||||
#endregion
|
||||
}
|
||||
Reference in New Issue
Block a user