Renamed Examples to Implementation

This commit is contained in:
Simon Gruber
2024-01-08 16:23:24 +01:00
parent b17044f959
commit 6c554e444f
1240 changed files with 0 additions and 0 deletions
@@ -0,0 +1,27 @@
namespace Ocr.Report.Models;
public readonly struct ImageStats
{
public string ImageName { get; }
public ICollection<string> Reference { get; }
public ICollection<ProcessorStat> Processors { get; }
public ImageStats(
string imageName,
ICollection<string> taggedWords,
IEnumerable<ScanFileInfo> scanResult
)
{
Reference = taggedWords;
ImageName = imageName;
Processors = scanResult
.Select(t =>
{
var (elapsed, words) = t.GetData();
return new ProcessorStat(t.ProcessorName, elapsed, taggedWords, words);
})
.ToArray();
}
}
@@ -0,0 +1,85 @@
using Common.Distance;
namespace Ocr.Report.Models;
public readonly struct ProcessorStat : IDistanceComparer<IEnumerable<string>>
{
/// <summary>
/// The name of the processor
/// </summary>
public string Name { get; }
/// <summary>
/// The total milliseconds it took the processor to process the data
/// </summary>
public double ProcessingTime { get; }
/// <inheritdoc />
public IEnumerable<string> Reference { get; }
/// <inheritdoc />
public IEnumerable<string>? Hypothesis { get; }
/// <inheritdoc />
public double Distance { get; }
/// <summary>
/// Information about the processed values
/// </summary>
public ICollection<IDistanceComparer<string>> Words { get; }
public ProcessorStat(
string name,
double processingTime,
ICollection<string> reference,
ICollection<string> hypothesis
)
{
Name = name;
ProcessingTime = processingTime;
Reference = reference;
Hypothesis = hypothesis;
Distance = Calculator.GetDistance(
reference.OrderBy(s => s).ToArray(),
hypothesis.OrderBy(s => s).ToArray()
) / reference.Count;
Words = GetDistanceInfos(reference, hypothesis).ToArray();
// Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
}
/// <summary>
/// Compares the <paramref name="referenceCollection"/> with all given values in the
/// <paramref name="hypothesisCollection"/>, determining the <see cref="IDistanceComparer{T}"/>
/// with the lowest error
/// </summary>
private static IEnumerable<IDistanceComparer<string>> GetDistanceInfos(
ICollection<string> referenceCollection,
ICollection<string> hypothesisCollection
)
{
var results = new List<IDistanceComparer<string>>();
foreach (var reference in referenceCollection)
{
var tResults = hypothesisCollection
.Select(hypothesis => new DistanceComparer<string>(reference, hypothesis))
.Cast<IDistanceComparer<string>>()
.ToList();
results.AddRange(tResults);
}
var lookup = results
.OrderBy(result => result.Distance)
.ToLookup(result => result.Reference);
foreach (var reference in referenceCollection)
{
yield return lookup[reference].FirstOrDefault() ?? new DistanceComparer<string>(reference);
}
}
}
@@ -0,0 +1,48 @@
using System.Text.Json;
using System.Text.RegularExpressions;
namespace Ocr.Report.Models;
public struct ScanFileInfo
{
public string Path { get; private init; }
private static readonly Regex parseRegex = new(
@"(?'image'.+)\.(?'processor'.+)\..+",
RegexOptions.Compiled | RegexOptions.CultureInvariant | RegexOptions.IgnoreCase
);
public string ProcessorName { get; set; }
public string ImageName { get; set; }
public (double Elapsed, ICollection<string> Words) GetData()
{
using var file = File.OpenRead(Path);
var root = JsonDocument.Parse(file).RootElement;
var words = root
.GetProperty("Words")
.EnumerateArray()
.Select(e => e.GetProperty("Text").GetString() ?? string.Empty)
.ToArray();
var elapsed = root.GetProperty("Elapsed").GetDouble();
return (elapsed, words);
}
public static ScanFileInfo FromPath(string path)
{
var match = parseRegex.Match(System.IO.Path.GetFileName(path));
return new ScanFileInfo
{
Path = path,
ProcessorName = match.Groups["processor"].Value,
ImageName = match.Groups["image"].Value
};
}
/// <inheritdoc />
public override string ToString() => ImageName;
}
@@ -0,0 +1,31 @@
using System.Text.Json;
namespace Ocr.Report.Models;
internal struct TagFileInfo
{
public string Path { get; private init; }
public string ImageName { get; set; }
public ICollection<string> GetWords()
{
using var file = File.OpenRead(Path);
return JsonDocument
.Parse(file)
.RootElement
.GetProperty("words")
.EnumerateArray()
.Select(w => w.GetString() ?? throw new Exception("Cannot parse null words"))
.ToArray();
}
public static TagFileInfo FromPath(string path) => new()
{
Path = path,
ImageName = System.IO.Path.GetFileNameWithoutExtension(path),
};
/// <inheritdoc />
public override string ToString() => ImageName;
}
@@ -0,0 +1,24 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net6.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<None Remove="Properties\htmldocument-style.css" />
</ItemGroup>
<ItemGroup>
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="7.0.5" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\..\ReportGeneration\ReportGeneration.Generators\ReportGeneration.Generators.csproj" />
<ProjectReference Include="..\..\ReportGeneration\ReportGeneration.Interface\ReportGeneration.Interface.csproj" />
<ProjectReference Include="..\Common\Common.csproj" />
</ItemGroup>
</Project>
+78
View File
@@ -0,0 +1,78 @@
using Common.Extensions;
using Ocr.Report.Models;
using ReportGeneration.Generators;
namespace Ocr.Report;
internal static class Program
{
internal static void Main(string[] args)
{
// Retrieve data
Console.WriteLine("Getting data");
var tagFileInfos = GetTagFileInfos(args[0]);
var scanFileInfos = GetScanFileInfos(args[1]);
// Parse
Console.WriteLine("Generating report");
var scans = Scan(tagFileInfos, scanFileInfos);
var path = Path.GetFullPath("report.html");
using var document = new HtmlDocumentGenerator(path);
using var report = new ReportGenerator("OCR Report", document, scans)
.AddComparison("Processing summary (Average)", v =>
{
var result = v.Average(out var deviation);
return (result, deviation);
})
// .AddComparison("Processing summary (Cumulative)", v => v.Sum())
.AddComparison("Processing summary (Median)", v =>
{
var result = v.Median(out var deviation);
return (result, deviation);
})
// .AddProcessorStats("Processor Stats")
.AddImageStatsFull("Scan Results");
Console.WriteLine($"Saved report to '{path}'");
}
private static IEnumerable<ImageStats> Scan(
IEnumerable<TagFileInfo> tagFileInfos,
IEnumerable<ScanFileInfo> scanFileInfos
)
{
var scanFileLookup = scanFileInfos.ToLookup(i => i.ImageName);
foreach (var i in tagFileInfos)
{
yield return new ImageStats(
i.ImageName,
i.GetWords().Distinct().OrderBy(w => w).ToArray(),
scanFileLookup[i.ImageName]
);
}
}
private static IEnumerable<TagFileInfo> GetTagFileInfos(string dir)
{
if (!Directory.Exists(dir))
{
throw new ArgumentException($"Invalid tagged data directory '{dir}'");
}
return Directory.EnumerateFiles(dir, "*.json").Select(TagFileInfo.FromPath);
}
private static IEnumerable<ScanFileInfo> GetScanFileInfos(string dir)
{
if (!Directory.Exists(dir))
{
throw new ArgumentException($"Invalid scan results directory '{dir}'");
}
return Directory.EnumerateFiles(dir, "*.json").Select(ScanFileInfo.FromPath);
}
}
@@ -0,0 +1,9 @@
{
"profiles": {
"ReportGenerator": {
"commandName": "Project",
"commandLineArgs": "\"img\" \"results\"",
"workingDirectory": "D:\\git\\BA\\Examples\\testdata"
}
}
}
@@ -0,0 +1,263 @@
using Ocr.Report.Models;
using ReportGeneration.Abstract.Model;
using ReportGeneration.Interface;
namespace Ocr.Report;
public class ReportGenerator : IDisposable
{
private IDocumentGenerator Document { get; }
private ICollection<ImageStats> Images { get; }
public ReportGenerator(
IDocumentGenerator document,
IEnumerable<ImageStats> data
)
{
Images = data.ToArray();
Document = document;
document.Open();
}
public ReportGenerator(
string title,
IDocumentGenerator document,
IEnumerable<ImageStats> data
) : this(document, data) => AddTitle(title);
#region Writing
public ReportGenerator AddTitle(string text)
{
Document.AppendHeading(1, text);
return this;
}
public ReportGenerator AddProcessorStats(string title)
{
Document.AppendHeading(2, title);
var processors = new Dictionary<string, ICollection<ImageStats>>();
foreach (var image in Images)
{
foreach (var processor in image.Processors)
{
if (processors.TryGetValue(processor.Name, out var images))
{
images.Add(image);
}
else
{
processors.Add(processor.Name, new List<ImageStats> { image });
}
}
}
foreach (var (processor, images) in processors)
{
var ordered = images
.Select(i => (Stats: i, Distance: i
.Processors
.Where(p => p.Name.Equals(processor))
.Select(p => p.Distance)
.Average()
))
.OrderBy(i => i.Distance)
.ToArray();
Document
.AppendHeading(3, processor)
.AppendTable(2, table =>
{
table.AppendHeader(new[] { "Image", "Preview", "Distance" });
foreach (var (stats, distance) in ordered)
{
var imgPath = Path.Combine("results", $"{processor}.00.{stats.ImageName}.png");
table.AppendRow(new[]
{
$"<a href=\"#{stats.ImageName}\">{stats.ImageName}</a>",
Document.FormatImage(imgPath, new Bounds(0, 150)),
distance.ToString("F2")
});
}
}
);
}
return this;
}
public ReportGenerator AddImageStatsFull(string title)
{
Document.AppendHeading(2, title);
foreach (var stat in Images)
{
Document
.AppendHeading(3, stat.ImageName)
.AppendParagraph(
Document.FormatImage(Path.Combine("img", stat.ImageName), new Bounds(0, 350))
)
.AppendTable(
stat.Reference.Count + 5,
table =>
{
table.AppendHeader(stat
.Reference
.Prepend("Image")
.Prepend("CER (avg)")
.Prepend("WER")
.Prepend("Elapsed")
.Prepend("Processor")
);
var processors = stat.Processors
.OrderBy(s => s.Distance)
.ThenBy(s => s.ProcessingTime);
foreach (var processor in processors)
{
var imgPath = Path.Combine("results", $"{processor.Name}.00.{stat.ImageName}.png");
table.AppendRow(processor.Words
.Select(s => s.ToString() ?? string.Empty)
.Prepend(Document.FormatImage(imgPath, new Bounds(0, 150)))
.Prepend(processor.Words.Average(s => s.Distance).ToString("F2"))
.Prepend($"{processor.Distance * 100:F1}%")
.Prepend($"{processor.ProcessingTime * 1000:F1}ms")
.Prepend(processor.Name)
);
}
})
.AppendParagraph(
$"Comparison data generated based on {stat.Reference.Count} tagged words."
);
}
return this;
}
public ReportGenerator AddComparison(
string title,
Func<IEnumerable<double>, (double, double)> evaluationFunc
)
{
var lookup = Images
.SelectMany(s => s.Processors)
.ToLookup(p => p.Name);
Document.AppendHeading(2, title);
var byWer = lookup
.Select(g =>
{
var (value, deviation) = evaluationFunc(g.Select(p => p.Distance * 100));
return (
Name: g.Key,
Value: value,
Deviation: deviation
);
})
.OrderBy(g => g.Value);
Document.AppendHeading(3, "WER");
AppendComparison(("Error", "%"), byWer);
var byCer = lookup
.Select(g =>
{
var (value, deviation) =
evaluationFunc(g.SelectMany(p => p.Words, (_, word) => word.Distance));
return (
Name: g.Key,
Value: value,
Deviation: deviation
);
})
.OrderBy(g => g.Value);
Document.AppendHeading(3, "CER");
AppendComparison(("Changes", string.Empty), byCer);
var byTime = lookup
.Select(g =>
{
var (value, deviation) = evaluationFunc(g.Select(p => p.ProcessingTime * 1000));
return (
Name: g.Key,
Value: value,
Deviation: deviation
);
})
.OrderBy(g => g.Value);
Document.AppendHeading(3, "Time");
AppendComparison(("Time", "ms"), byTime);
return this;
}
private void AppendComparison(
(string name, string unit)? valueInfo,
IEnumerable<(string, double, double)> values
)
{
const int context = 5;
var tValues = values.ToArray();
var tContext = Math.Min(tValues.Length / 2, context);
Document.AppendTable(3, table =>
{
table
.AppendHeader(new[]
{
"Processor",
valueInfo?.name ?? "Value",
"Deviation"
})
.AppendRows(tValues
.Take(tContext)
.Select(MakeRow))
.AppendRow("...")
.AppendRows(
tValues
.TakeLast(tContext)
.Select(MakeRow)
);
return;
string[] MakeRow((string, double, double) v) =>
new[]
{
v.Item1,
v.Item2.ToString("F2") + valueInfo?.unit,
v.Item3.ToString("F2") + valueInfo?.unit
};
});
}
#endregion
#region IDisposable
protected virtual void Dispose(bool disposing)
{
if (disposing)
{
Document.Dispose();
}
}
/// <inheritdoc />
public void Dispose()
{
Dispose(true);
GC.SuppressFinalize(this);
}
#endregion
}