namespace ReportGenerator.Models; internal readonly struct ProcessorStat { public string ProcessorName { get; } = string.Empty; public ICollection CharacterStats { get; } = Array.Empty(); public double WordError { get; } = double.PositiveInfinity; public ProcessorStat( string processorName, ICollection reference, ICollection values ) { ProcessorName = processorName; WordError = CalculateWer( reference, values ); CharacterStats = GetCharacterStat( reference, values ).ToArray(); } public IEnumerable ToRow() => CharacterStats .Select(s => s.ToString()) .Append(WordError.ToString("F2")) .Append(CharacterStats.Average(s => s.CharacterError).ToString("F2")) .Append(CharacterStats.Sum(s => s.CharacterError).ToString("F2")) .Prepend(ProcessorName); /// /// Finds the smallest possible CER by calculating the levenshtein /// distance to every word and returning the most similar combination /// /// private static IEnumerable GetCharacterStat( IEnumerable reference, ICollection values ) { foreach (var refValue in reference) { CharacterStats result = new CharacterStats(refValue); foreach (var value in values) { var stat = new CharacterStats(refValue, value); if (stat.CharacterError > result.CharacterError) { continue; } result = stat; if (stat.CharacterError == 0) { break; } } yield return result; } } static double CalculateWer(ICollection expected, ICollection actual) { if (!actual.Any()) { return double.PositiveInfinity; } // Amount of words that need to be substituted to match the original int substitutions = expected .Zip(actual, (e, a) => string.Equals(e, a) ? 0 : 1) .Sum(); // todo this isn't correct i think // Amount of words dropped from the original int deletions = expected.Except(actual).Count(); // Amount of extra words added compared to the original int insertions = actual.Except(expected).Count(); return (substitutions + deletions + insertions) / (double)expected.Count; } }