This repository has been archived on 2024-06-04. You can view files and clone it. You cannot open issues or pull requests or push a commit.
Files
thesis-src/Examples/ReportGenerator/Models/ProcessorStat.cs
T
2023-11-20 14:50:23 +01:00

91 lines
2.3 KiB
C#

namespace ReportGenerator.Models;
internal readonly struct ProcessorStat
{
public string ProcessorName { get; } = string.Empty;
public ICollection<CharacterStats> CharacterStats { get; } = Array.Empty<CharacterStats>();
public double WordError { get; } = double.PositiveInfinity;
public ProcessorStat(
string processorName,
ICollection<string> reference,
ICollection<string> values
)
{
ProcessorName = processorName;
WordError = CalculateWer(
reference,
values
);
CharacterStats = GetCharacterStat(
reference,
values
).ToArray();
}
public IEnumerable<string> ToRow() => CharacterStats
.Select(s => s.ToString())
.Append(WordError.ToString("F2"))
.Append(CharacterStats.Average(s => s.CharacterError).ToString("F2"))
.Append(CharacterStats.Sum(s => s.CharacterError).ToString("F2"))
.Prepend(ProcessorName);
/// <summary>
/// Finds the smallest possible CER by calculating the levenshtein
/// distance to every word and returning the most similar combination
/// </summary>
/// <returns></returns>
private static IEnumerable<CharacterStats> GetCharacterStat(
IEnumerable<string> reference,
ICollection<string> values
)
{
foreach (var refValue in reference)
{
CharacterStats result = new CharacterStats(refValue);
foreach (var value in values)
{
var stat = new CharacterStats(refValue, value);
if (stat.CharacterError > result.CharacterError)
{
continue;
}
result = stat;
if (stat.CharacterError == 0)
{
break;
}
}
yield return result;
}
}
static double CalculateWer(ICollection<string> expected, ICollection<string> actual)
{
if (!actual.Any())
{
return double.PositiveInfinity;
}
// Amount of words that need to be substituted to match the original
int substitutions = expected
.Zip(actual, (e, a) => string.Equals(e, a) ? 0 : 1)
.Sum();
// todo this isn't correct i think
// Amount of words dropped from the original
int deletions = expected.Except(actual).Count();
// Amount of extra words added compared to the original
int insertions = actual.Except(expected).Count();
return (substitutions + deletions + insertions) / (double)expected.Count;
}
}