96 lines
2.4 KiB
C#
96 lines
2.4 KiB
C#
using Common.Distance;
|
|
|
|
namespace ReportGenerator.Models;
|
|
|
|
public readonly struct ProcessorStat : IDistanceComparer<IEnumerable<string>>
|
|
{
|
|
/// <summary>
|
|
/// The name of the processor
|
|
/// </summary>
|
|
public string Name { get; }
|
|
|
|
/// <summary>
|
|
/// The total milliseconds it took the processor to process the data
|
|
/// </summary>
|
|
public double ProcessingTime { get; }
|
|
|
|
/// <inheritdoc />
|
|
public IEnumerable<string> Reference { get; }
|
|
|
|
/// <inheritdoc />
|
|
public IEnumerable<string>? Hypothesis { get; }
|
|
|
|
/// <inheritdoc />
|
|
public double Distance { get; }
|
|
|
|
/// <summary>
|
|
/// Information about the processed values
|
|
/// </summary>
|
|
public ICollection<IDistanceComparer<string>> Words { get; }
|
|
|
|
public ProcessorStat(
|
|
string name,
|
|
double processingTime,
|
|
ICollection<string> reference,
|
|
ICollection<string> hypothesis
|
|
)
|
|
{
|
|
Name = name;
|
|
ProcessingTime = processingTime;
|
|
|
|
Reference = reference;
|
|
Hypothesis = hypothesis;
|
|
|
|
Distance = Calculator.GetDistance(
|
|
reference.OrderBy(s => s).ToArray(),
|
|
hypothesis.OrderBy(s => s).ToArray()
|
|
) / reference.Count;
|
|
|
|
Words = reference.Select(r => GetDistanceInfo(r, hypothesis)).ToArray();
|
|
}
|
|
|
|
private static ICollection<IDistanceComparer<string>> GetDistanceInfos(
|
|
ICollection<string> reference,
|
|
ICollection<string> hypothesis
|
|
)
|
|
{
|
|
// todo avoid matching the same reference with a value multiple times
|
|
throw new NotImplementedException();
|
|
}
|
|
|
|
/// <summary>
|
|
/// Compares the <paramref name="reference"/> with all given <paramref name="values"/>
|
|
/// and determines the <see cref="IDistanceComparer{T}"/> with the lowest error
|
|
/// </summary>
|
|
private static IDistanceComparer<string> GetDistanceInfo(
|
|
string reference,
|
|
IEnumerable<string> values
|
|
)
|
|
{
|
|
var result = new DistanceComparer<string>(reference);
|
|
|
|
// Determine character stat with lowest error
|
|
foreach (var value in values)
|
|
{
|
|
// todo avoid matching the same reference with a value multiple times
|
|
|
|
var stat = new DistanceComparer<string>(reference, value);
|
|
if (stat.Distance > result.Distance || (stat.Distance / reference.Length) > 0.6d)
|
|
{
|
|
// todo fine-tune threshold
|
|
continue;
|
|
}
|
|
|
|
result = stat;
|
|
|
|
if (stat.Distance <= 0)
|
|
{
|
|
// We cannot go lower than zero, break
|
|
return result;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|