Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 120 additions & 0 deletions FuzzySharp.Benchmarks/BenchmarkAll.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
using BenchmarkDotNet.Attributes;
using FuzzySharp.Extractor;
using FuzzySharp.PreProcess;

namespace FuzzySharp.Benchmarks;

[MemoryDiagnoser]
public class BenchmarkAll
{
[Benchmark]
public int Ratio1()
{
return Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng");
}

[Benchmark]
public int Ratio2()
{
return Fuzz.Ratio("mysmilarstring", "mysimilarstring");
}

[Benchmark]
public int PartialRatio()
{
return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring");
}

[Benchmark]
public int TokenSortRatio()
{
return Fuzz.TokenSortRatio("order words out of", " words out of order");
}

[Benchmark]
public int PartialTokenSortRatio()
{
return Fuzz.PartialTokenSortRatio("order words out of", " words out of order");
}

[Benchmark]
public int TokenSetRatio()
{
return Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
}

[Benchmark]
public int PartialTokenSetRatio()
{
return Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
}

[Benchmark]
public int WeightedRatio()
{
return Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog");
}

[Benchmark]
public int TokenInitialismRatio1()
{
return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration");
}

[Benchmark]
public int TokenInitialismRatio2()
{
return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration");
}

[Benchmark]
public int TokenInitialismRatio3()
{
return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
}

[Benchmark]
public int PartialTokenInitialismRatio()
{
return Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
}

[Benchmark]
public int TokenAbbreviationRatio()
{
return Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);
}

[Benchmark]
public int PartialTokenAbbreviationRatio()
{
return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);
}

private static readonly string[][] Events =
[
["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"],
["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"]
];

private static readonly string[] Query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"];

[Benchmark]
public ExtractedResult<string[]> ExtractOne()
{
return Process.ExtractOne(Query, Events, static strings => strings[0]);
}

[Benchmark]
public int LevenshteinDistance()
{
return Levenshtein.EditDistance("chicago cubs vs new york mets".AsSpan(), "new york mets vs chicago cubs".AsSpan());
}

[Benchmark]
public int FastenshteinDistance()
{
return Fastenshtein.Levenshtein.Distance("chicago cubs vs new york mets", "new york mets vs chicago cubs");
}
}
19 changes: 19 additions & 0 deletions FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="Fastenshtein" Version="1.0.10" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\FuzzySharp\FuzzySharp.csproj" />
</ItemGroup>

</Project>
6 changes: 6 additions & 0 deletions FuzzySharp.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
using BenchmarkDotNet.Running;
using FuzzySharp;

BenchmarkRunner.Run(typeof(Program).Assembly);

//Console.WriteLine(Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"));
28 changes: 14 additions & 14 deletions FuzzySharp.Test/EvaluationTests/EvaluationTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -36,20 +36,20 @@ public void Evaluate()



var h1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" });
var h2 = string.Join(", ", Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3));
var h3 = string.Join(", ", Process.ExtractAll("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));
var h4 = string.Join(", ", Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40));
var h5 = string.Join(", ", Process.ExtractSorted("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));

var i1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get<DefaultRatioScorer>());

var events = new[]
{
new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" },
new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" },
new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" },
};
var h1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"]);
var h2 = string.Join(", ", Process.ExtractTop("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], limit: 3));
var h3 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"]));
var h4 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], cutoff: 40));
var h5 = string.Join(", ", Process.ExtractSorted("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"]));

var i1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"], s => s, ScorerCache.Get<DefaultRatioScorer>());

string[][] events =
[
["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"],
["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"]
];
var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" };

var best = Process.ExtractOne(query, events, strings => strings[0]);
Expand Down
8 changes: 4 additions & 4 deletions FuzzySharp.Test/FuzzySharp.Test.csproj
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>

<TargetFrameworks>NET8.0;netcoreapp3.1;netframework4.7.2</TargetFrameworks>
<IsPackable>false</IsPackable>
<LangVersion>12.0</LangVersion>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="nunit" Version="3.12.0" />
<PackageReference Include="NUnit.Console" Version="3.11.1" />
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1">
<PackageReference Include="NUnit3TestAdapter" Version="4.6.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.4.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
</ItemGroup>

<ItemGroup>
Expand Down
21 changes: 7 additions & 14 deletions FuzzySharp.Test/FuzzyTests/RegressionTests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,37 @@ namespace FuzzySharp.Test.FuzzyTests
public class RegressionTests
{


/// <summary>
/// Test to ensure that all IRatioScorer implementations handle scoring empty strings & whitespace strings
/// </summary>
[Test]
public void TestScoringEmptyString()
{

var scorerType = typeof(IRatioScorer);
var assemblies = AppDomain.CurrentDomain.GetAssemblies().ToList();
var types = assemblies.SelectMany(s =>
{
Type[] types = new Type[] { }; ;
try
{
types = s.GetTypes();
return s.GetTypes();
}
catch {}
return types;
return [];
}).ToList();
var scorerTypes = types.Where(t => scorerType.IsAssignableFrom(t) && !t.IsAbstract && t.IsClass).ToList();
//var scorerTypes = AppDomain.CurrentDomain.GetAssemblies().SelectMany(s => s.GetTypes()).Where(p => scorerType.IsAssignableFrom(p) && p.IsClass && !p.IsAbstract);


MethodInfo getScorerCacheMethodInfo = typeof(ScorerCache).GetMethod("Get");


string nullString = null; //Null doesnt seem to be handled by any scorer
string emptyString = "";
string whitespaceString = " ";

string[] nullOrWhitespaceStrings = { emptyString, whitespaceString };
string[] nullOrWhitespaceStrings = [emptyString, whitespaceString];
MethodInfo getScorerCacheMethodInfo = typeof(ScorerCache).GetMethod("Get");

foreach (Type t in scorerTypes)
foreach (var t in scorerTypes)
{
System.Diagnostics.Debug.WriteLine($"Testing {t.Name}");
MethodInfo m = getScorerCacheMethodInfo.MakeGenericMethod(t);
IRatioScorer scorer = m.Invoke(this, new object[] { }) as IRatioScorer;
IRatioScorer scorer = m.Invoke(this, []) as IRatioScorer;

foreach(string s in nullOrWhitespaceStrings)
{
Expand Down Expand Up @@ -79,7 +73,6 @@ public void TestScoringEmptyString()

}


}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
using FuzzySharp.SimilarityRatio.Scorer;
using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;
using NUnit.Framework;
using System;
using System.Collections.Generic;
using System.Text;

namespace FuzzySharp.Test.FuzzyTests.ScorerTests
{
Expand Down
12 changes: 9 additions & 3 deletions FuzzySharp.sln
Original file line number Diff line number Diff line change
@@ -1,11 +1,13 @@

Microsoft Visual Studio Solution File, Format Version 12.00
# Visual Studio Version 16
VisualStudioVersion = 16.0.29806.167
# Visual Studio Version 17
VisualStudioVersion = 17.10.35122.118
MinimumVisualStudioVersion = 10.0.40219.1
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp", "FuzzySharp\FuzzySharp.csproj", "{348B90DA-DA44-45AD-B857-D3A69D05AE46}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzySharp.Test", "FuzzySharp.Test\FuzzySharp.Test.csproj", "{48F4C7CB-E669-410C-A455-DE3330347807}"
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp.Test", "FuzzySharp.Test\FuzzySharp.Test.csproj", "{48F4C7CB-E669-410C-A455-DE3330347807}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzySharp.Benchmarks", "FuzzySharp.Benchmarks\FuzzySharp.Benchmarks.csproj", "{480CAE39-ACA7-411A-BF6B-72E61ED6E129}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Expand All @@ -21,6 +23,10 @@ Global
{48F4C7CB-E669-410C-A455-DE3330347807}.Debug|Any CPU.Build.0 = Debug|Any CPU
{48F4C7CB-E669-410C-A455-DE3330347807}.Release|Any CPU.ActiveCfg = Release|Any CPU
{48F4C7CB-E669-410C-A455-DE3330347807}.Release|Any CPU.Build.0 = Release|Any CPU
{480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Debug|Any CPU.Build.0 = Debug|Any CPU
{480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Release|Any CPU.ActiveCfg = Release|Any CPU
{480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE
Expand Down
5 changes: 1 addition & 4 deletions FuzzySharp/Edits/MatchingBlock.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,6 @@ public class MatchingBlock
public int DestPos { get; set; }
public int Length { get; set; }

public override string ToString()
{
return $"({SourcePos},{DestPos},{Length})";
}
public override string ToString() => $"({SourcePos},{DestPos},{Length})";
}
}
67 changes: 67 additions & 0 deletions FuzzySharp/Extensions/StringExtensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
using System;
using System.Collections.Generic;

namespace FuzzySharp.Extensions
{
internal static class StringExtensions
{
public static List<string> ExtractTokens(this string input)
{
var result = new List<string>();

if (string.IsNullOrEmpty(input))
return result;

var span = input.AsSpan();

var start = 0;
for (var i = 0; i < span.Length; i++)
{
if (char.IsLetter(span[i])) continue;

if (i - start > 0)
{
result.Add(span[start..i].ToString());
}

start = i+1;
}

if (span.Length - start > 0)
result.Add(span[start..].ToString());

return result;
}

public static string[] SplitByAnySpace(this string input)
{
if (string.IsNullOrWhiteSpace(input))
return [];

var words = input.Split(EmptyArray<char>(), StringSplitOptions.RemoveEmptyEntries);

return words;
}

public static string[] GetSortedWords(this string input)
{
var words = SplitByAnySpace(input);

Array.Sort(words);

return words;
}

public static string NormalizeSpacesAndSort(this string input)
{
var words = GetSortedWords(input);

return string.Join(" ", words);
}

private static T[] EmptyArray<T>()
{
return [];
}
}
}
2 changes: 1 addition & 1 deletion FuzzySharp/Extractor/ExtractedResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ public override string ToString()
{
return $"(string: {Value}, score: {Score}, index: {Index})";
}
return $"(value: {Value.ToString()}, score: {Score}, index: {Index})";
return $"(value: {Value}, score: {Score}, index: {Index})";
}
}
}
Loading