91 lines
2.3 KiB
C#
91 lines
2.3 KiB
C#
namespace ReportGenerator.Models;
|
|
|
|
internal readonly struct ProcessorStat
|
|
{
|
|
public string ProcessorName { get; } = string.Empty;
|
|
public ICollection<CharacterStats> CharacterStats { get; } = Array.Empty<CharacterStats>();
|
|
public double WordError { get; } = double.PositiveInfinity;
|
|
|
|
public ProcessorStat(
|
|
string processorName,
|
|
ICollection<string> reference,
|
|
ICollection<string> values
|
|
)
|
|
{
|
|
ProcessorName = processorName;
|
|
|
|
WordError = CalculateWer(
|
|
reference,
|
|
values
|
|
);
|
|
|
|
CharacterStats = GetCharacterStat(
|
|
reference,
|
|
values
|
|
).ToArray();
|
|
}
|
|
|
|
public IEnumerable<string> ToRow() => CharacterStats
|
|
.Select(s => s.ToString())
|
|
.Append(WordError.ToString("F2"))
|
|
.Append(CharacterStats.Average(s => s.CharacterError).ToString("F2"))
|
|
.Append(CharacterStats.Sum(s => s.CharacterError).ToString("F2"))
|
|
.Prepend(ProcessorName);
|
|
|
|
/// <summary>
|
|
/// Finds the smallest possible CER by calculating the levenshtein
|
|
/// distance to every word and returning the most similar combination
|
|
/// </summary>
|
|
/// <returns></returns>
|
|
private static IEnumerable<CharacterStats> GetCharacterStat(
|
|
IEnumerable<string> reference,
|
|
ICollection<string> values
|
|
)
|
|
{
|
|
foreach (var refValue in reference)
|
|
{
|
|
CharacterStats result = new CharacterStats(refValue);
|
|
|
|
foreach (var value in values)
|
|
{
|
|
var stat = new CharacterStats(refValue, value);
|
|
if (stat.CharacterError > result.CharacterError)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
result = stat;
|
|
|
|
if (stat.CharacterError == 0)
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
yield return result;
|
|
}
|
|
}
|
|
|
|
static double CalculateWer(ICollection<string> expected, ICollection<string> actual)
|
|
{
|
|
if (!actual.Any())
|
|
{
|
|
return double.PositiveInfinity;
|
|
}
|
|
|
|
// Amount of words that need to be substituted to match the original
|
|
int substitutions = expected
|
|
.Zip(actual, (e, a) => string.Equals(e, a) ? 0 : 1)
|
|
.Sum();
|
|
|
|
// todo this isn't correct i think
|
|
// Amount of words dropped from the original
|
|
int deletions = expected.Except(actual).Count();
|
|
|
|
// Amount of extra words added compared to the original
|
|
int insertions = actual.Except(expected).Count();
|
|
|
|
return (substitutions + deletions + insertions) / (double)expected.Count;
|
|
}
|
|
}
|