Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
cf17a0b
replace arrays with spans
ycherkes May 26, 2024
d4e0613
further optimizations
Jul 31, 2024
33daf54
code cleanup
Jul 31, 2024
f5073bd
further optimizations, code cleanup
Aug 2, 2024
48b4b45
Refactor and optimize codebase; update dependencies
Aug 6, 2024
1b39fb9
target lib to netstandard2.0-2.1
Aug 6, 2024
0b4f383
simplify test
Aug 7, 2024
bedb9b1
more optimization + benchmark
Aug 8, 2024
605d700
formatting
Aug 9, 2024
accca0a
revert some frameworks, fix unit tests
ycherkes Aug 10, 2024
e299031
little speedup
ycherkes Aug 10, 2024
4f07228
remove linq.max
ycherkes Aug 10, 2024
51e27ce
revert sealed classes back
ycherkes Aug 10, 2024
b813c58
remove unnecessary changes
ycherkes Aug 10, 2024
a83f39a
remove unnecessary vshistory from gitignore
ycherkes Aug 10, 2024
4fb074a
remove unnecessary conversion to span
ycherkes Aug 10, 2024
f43f4ee
replace concurrentdictionary with generic instance creation
ycherkes Aug 10, 2024
81bf39f
remove duplicate, format
Aug 12, 2024
b2ba8bb
remove unnecessary calls asspan in the cycle
Aug 12, 2024
dbd6231
Add prefix Raffinert to namespaces to be able to use and compare both…
ycherkes Aug 17, 2024
6c52200
reference classic fuzzysharp in benchmarks
ycherkes Aug 17, 2024
a0aade0
+ ExtractOne benchmark
ycherkes Aug 17, 2024
d0a80e5
feature: Extract method with (string query, IEnumerable<T> choices) s…
ycherkes Aug 18, 2024
e336f0d
readme
ycherkes Aug 18, 2024
0d20b55
add benchmark
ycherkes Aug 18, 2024
48ec28b
fix names, versions, formatting
ycherkes Aug 18, 2024
dc2b858
readme
ycherkes Aug 18, 2024
c2d5dde
remove readme duplicate
ycherkes Aug 18, 2024
b27faf3
readme
ycherkes Aug 18, 2024
9646f2b
fix a typo
ycherkes Aug 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
211 changes: 211 additions & 0 deletions FuzzySharp.Benchmarks/BenchmarkAll.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,211 @@
using BenchmarkDotNet.Attributes;
using Raffinert.FuzzySharp.Extractor;
using Raffinert.FuzzySharp.PreProcess;
using Classic = FuzzySharp;

namespace Raffinert.FuzzySharp.Benchmarks;

[MemoryDiagnoser]
public class BenchmarkAll
{
[Benchmark]
public int Ratio1()
{
return Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng");
}

[Benchmark]
public int Ratio2()
{
return Fuzz.Ratio("mysmilarstring", "mysimilarstring");
}

[Benchmark]
public int PartialRatio()
{
return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring");
}

[Benchmark]
public int TokenSortRatio()
{
return Fuzz.TokenSortRatio("order words out of", " words out of order");
}

[Benchmark]
public int PartialTokenSortRatio()
{
return Fuzz.PartialTokenSortRatio("order words out of", " words out of order");
}

[Benchmark]
public int TokenSetRatio()
{
return Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
}

[Benchmark]
public int PartialTokenSetRatio()
{
return Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
}

[Benchmark]
public int WeightedRatio()
{
return Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog");
}

[Benchmark]
public int TokenInitialismRatio1()
{
return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration");
}

[Benchmark]
public int TokenInitialismRatio2()
{
return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration");
}

[Benchmark]
public int TokenInitialismRatio3()
{
return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
}

[Benchmark]
public int PartialTokenInitialismRatio()
{
return Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
}

[Benchmark]
public int TokenAbbreviationRatio()
{
return Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);
}

[Benchmark]
public int PartialTokenAbbreviationRatio()
{
return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);
}

[Benchmark]
public int Ratio1Classic()
{
return Classic.Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng");
}

[Benchmark]
public int Ratio2Classic()
{
return Classic.Fuzz.Ratio("mysmilarstring", "mysimilarstring");
}

[Benchmark]
public int PartialRatioClassic()
{
return Classic.Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring");
}

[Benchmark]
public int TokenSortRatioClassic()
{
return Classic.Fuzz.TokenSortRatio("order words out of", " words out of order");
}

[Benchmark]
public int PartialTokenSortRatioClassic()
{
return Classic.Fuzz.PartialTokenSortRatio("order words out of", " words out of order");
}

[Benchmark]
public int TokenSetRatioClassic()
{
return Classic.Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
}

[Benchmark]
public int PartialTokenSetRatioClassic()
{
return Classic.Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
}

[Benchmark]
public int WeightedRatioClassic()
{
return Classic.Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog");
}

[Benchmark]
public int TokenInitialismRatio1Classic()
{
return Classic.Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration");
}

[Benchmark]
public int TokenInitialismRatio2Classic()
{
return Classic.Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration");
}

[Benchmark]
public int TokenInitialismRatio3Classic()
{
return Classic.Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
}

[Benchmark]
public int PartialTokenInitialismRatioClassic()
{
return Classic.Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
}

[Benchmark]
public int TokenAbbreviationRatioClassic()
{
return Classic.Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", Classic.PreProcess.PreprocessMode.Full);
}

[Benchmark]
public int PartialTokenAbbreviationRatioClassic()
{
return Classic.Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", Classic.PreProcess.PreprocessMode.Full);
}

private static readonly string[][] Events =
[
["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"],
["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"]
];

private static readonly string[] Query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"];

[Benchmark]
public ExtractedResult<string[]> ExtractOne()
{
return Process.ExtractOne(Query, Events, static strings => strings[0]);
}

[Benchmark]
public Classic.Extractor.ExtractedResult<string[]> ExtractOneClassic()
{
return Classic.Process.ExtractOne(Query, Events, static strings => strings[0]);
}

[Benchmark]
public int LevenshteinDistance()
{
return Levenshtein.EditDistance("chicago cubs vs new york mets".AsSpan(), "new york mets vs chicago cubs".AsSpan());
}

[Benchmark]
public int FastenshteinDistance()
{
return Fastenshtein.Levenshtein.Distance("chicago cubs vs new york mets", "new york mets vs chicago cubs");
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
```

BenchmarkDotNet v0.14.0, Windows 11 (10.0.22631.4037/23H2/2023Update/SunValley3)
12th Gen Intel Core i7-1255U, 1 CPU, 12 logical and 10 physical cores
.NET SDK 8.0.400
[Host] : .NET 8.0.8 (8.0.824.36612), X64 RyuJIT AVX2
DefaultJob : .NET 8.0.8 (8.0.824.36612), X64 RyuJIT AVX2


```
| Method | Mean | Error | StdDev | Median | Gen0 | Gen1 | Allocated |
|------------------------------------- |-------------:|-------------:|-------------:|-------------:|-------:|-------:|----------:|
| Ratio1 | 206.81 ns | 2.409 ns | 2.136 ns | 207.29 ns | 0.0165 | - | 104 B |
| Ratio2 | 13.76 ns | 0.319 ns | 0.899 ns | 13.29 ns | - | - | - |
| PartialRatio | 723.24 ns | 22.148 ns | 59.498 ns | 692.31 ns | 0.3786 | 0.0010 | 2376 B |
| TokenSortRatio | 801.65 ns | 54.000 ns | 159.219 ns | 882.82 ns | 0.0896 | - | 568 B |
| PartialTokenSortRatio | 899.87 ns | 30.597 ns | 89.254 ns | 921.35 ns | 0.1154 | - | 728 B |
| TokenSetRatio | 1,093.68 ns | 28.071 ns | 80.993 ns | 1,096.79 ns | 0.3500 | - | 2200 B |
| PartialTokenSetRatio | 1,380.95 ns | 52.967 ns | 154.507 ns | 1,392.58 ns | 0.5112 | - | 3208 B |
| WeightedRatio | 12,561.44 ns | 767.193 ns | 2,225.766 ns | 13,232.62 ns | 0.7935 | - | 5072 B |
| TokenInitialismRatio1 | 294.56 ns | 6.757 ns | 18.946 ns | 297.41 ns | 0.0625 | - | 392 B |
| TokenInitialismRatio2 | 275.14 ns | 5.562 ns | 15.503 ns | 272.03 ns | 0.0548 | - | 344 B |
| TokenInitialismRatio3 | 542.62 ns | 10.893 ns | 29.635 ns | 541.23 ns | 0.1106 | - | 696 B |
| PartialTokenInitialismRatio | 749.64 ns | 15.039 ns | 32.373 ns | 744.13 ns | 0.1845 | - | 1160 B |
| TokenAbbreviationRatio | 1,270.08 ns | 24.756 ns | 41.361 ns | 1,255.59 ns | 0.2508 | - | 1576 B |
| PartialTokenAbbreviationRatio | 1,536.55 ns | 45.771 ns | 129.097 ns | 1,561.22 ns | 0.3357 | - | 2112 B |
| Ratio1Classic | 677.17 ns | 13.437 ns | 29.212 ns | 681.43 ns | 0.0505 | - | 320 B |
| Ratio2Classic | 104.42 ns | 2.102 ns | 3.626 ns | 105.17 ns | 0.0318 | - | 200 B |
| PartialRatioClassic | 2,249.40 ns | 44.588 ns | 118.242 ns | 2,274.26 ns | 0.5360 | 0.0019 | 3368 B |
| TokenSortRatioClassic | 3,071.78 ns | 92.892 ns | 266.524 ns | 3,143.59 ns | 0.3510 | - | 2216 B |
| PartialTokenSortRatioClassic | 3,317.62 ns | 64.881 ns | 82.054 ns | 3,327.15 ns | 0.4005 | - | 2536 B |
| TokenSetRatioClassic | 4,309.09 ns | 85.081 ns | 184.959 ns | 4,337.85 ns | 0.6905 | - | 4352 B |
| PartialTokenSetRatioClassic | 4,771.35 ns | 92.361 ns | 230.012 ns | 4,849.64 ns | 0.9308 | - | 5840 B |
| WeightedRatioClassic | 24,181.32 ns | 721.231 ns | 2,046.011 ns | 24,472.06 ns | 2.1362 | - | 13482 B |
| TokenInitialismRatio1Classic | 1,041.92 ns | 20.745 ns | 39.470 ns | 1,044.25 ns | 0.1440 | - | 904 B |
| TokenInitialismRatio2Classic | 824.97 ns | 26.765 ns | 75.051 ns | 844.97 ns | 0.1173 | - | 736 B |
| TokenInitialismRatio3Classic | 1,971.98 ns | 39.316 ns | 91.901 ns | 1,989.39 ns | 0.2460 | - | 1552 B |
| PartialTokenInitialismRatioClassic | 2,249.70 ns | 44.057 ns | 65.943 ns | 2,259.86 ns | 0.3414 | - | 2144 B |
| TokenAbbreviationRatioClassic | 2,727.98 ns | 84.791 ns | 241.914 ns | 2,779.33 ns | 0.4730 | - | 2984 B |
| PartialTokenAbbreviationRatioClassic | 3,162.92 ns | 88.249 ns | 247.460 ns | 3,193.32 ns | 0.6180 | - | 3896 B |
| ExtractOne | 33,770.23 ns | 1,260.134 ns | 3,595.234 ns | 34,371.46 ns | 1.8616 | - | 11728 B |
| ExtractOneClassic | 54,594.63 ns | 1,971.629 ns | 5,625.169 ns | 55,347.68 ns | 4.5776 | - | 29011 B |
| LevenshteinDistance | 2,096.37 ns | 58.508 ns | 167.872 ns | 2,141.95 ns | 0.0229 | - | 144 B |
| FastenshteinDistance | 1,533.82 ns | 38.323 ns | 108.715 ns | 1,564.52 ns | 0.0229 | - | 144 B |
22 changes: 22 additions & 0 deletions FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<AssemblyName>$(MSBuildProjectName)</AssemblyName>
<RootNamespace>Raffinert.$(MSBuildProjectName.Replace(" ", "_"))</RootNamespace>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
<PackageReference Include="Fastenshtein" Version="1.0.10" />
<PackageReference Include="FuzzySharp" Version="2.0.2" />
</ItemGroup>

<ItemGroup>
<ProjectReference Include="..\FuzzySharp\FuzzySharp.csproj" />
</ItemGroup>

</Project>
38 changes: 38 additions & 0 deletions FuzzySharp.Benchmarks/Program.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
using BenchmarkDotNet.Running;
using Raffinert.FuzzySharp.Benchmarks;
//using Raffinert.FuzzySharp;
//using Raffinert.FuzzySharp.SimilarityRatio;
//using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite;
//using Classic = FuzzySharp;

BenchmarkRunner.Run(typeof(Program).Assembly);

//var input1 = "+30.0% Damage to Close Enemies [30.01%";
//var input2Collection = new[]
//{
// "+#% Damage",
// "+#% Damage to Crowd Controlled Enemies",
// "+#% Damage to Close Enemies",
// "+#% Damage to Chilled Enemies",
// "+#% Damage to Poisoned Enemies",
// "#% Block Chance#% Blocked Damage Reduction",
// "#% Damage Reduction from Bleeding Enemies",
// "#% Damage Reduction",
// "+#% Cold Damage"
//};

//var classicScorer = Classic.SimilarityRatio.ScorerCache.Get<Classic.SimilarityRatio.Scorer.Composite.WeightedRatioScorer>();

//Func<string, int> classicScorerFunc = input2 => classicScorer.Score(input1, input2);

//var classicResult = input2Collection.Select(classicScorerFunc).ToList();

//var scorer = ScorerCache.Get<WeightedRatioScorer>();

//Func<string, int> scorerFunc = input2 => scorer.Score(input1, input2);

//var result = input2Collection.Select(scorerFunc).ToList();

//Console.WriteLine();

//Console.WriteLine(Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"));
36 changes: 18 additions & 18 deletions FuzzySharp.Test/EvaluationTests/EvaluationTests.cs
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
using FuzzySharp.PreProcess;
using FuzzySharp.SimilarityRatio;
using FuzzySharp.SimilarityRatio.Scorer.Composite;
using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;
using NUnit.Framework;
using NUnit.Framework;
using Raffinert.FuzzySharp.PreProcess;
using Raffinert.FuzzySharp.SimilarityRatio;
using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite;
using Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive;

namespace FuzzySharp.Test.EvaluationTests
namespace Raffinert.FuzzySharp.Test.EvaluationTests
{
[TestFixture]
public class EvaluationTests
Expand Down Expand Up @@ -36,20 +36,20 @@ public void Evaluate()



var h1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" });
var h2 = string.Join(", ", Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3));
var h3 = string.Join(", ", Process.ExtractAll("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));
var h4 = string.Join(", ", Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40));
var h5 = string.Join(", ", Process.ExtractSorted("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));
var h1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"]);
var h2 = string.Join(", ", Process.ExtractTop("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], limit: 3));
var h3 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"]));
var h4 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], cutoff: 40));
var h5 = string.Join(", ", Process.ExtractSorted("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"]));

var i1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get<DefaultRatioScorer>());
var i1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"], s => s, ScorerCache.Get<DefaultRatioScorer>());

var events = new[]
{
new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" },
new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" },
new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" },
};
string[][] events =
[
["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"],
["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"],
["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"]
];
var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" };

var best = Process.ExtractOne(query, events, strings => strings[0]);
Expand Down
10 changes: 6 additions & 4 deletions FuzzySharp.Test/FuzzySharp.Test.csproj
Original file line number Diff line number Diff line change
@@ -1,19 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">

<PropertyGroup>
<TargetFramework>netcoreapp3.1</TargetFramework>

<TargetFrameworks>NET8.0;netcoreapp3.1;netframework4.7.2</TargetFrameworks>
<IsPackable>false</IsPackable>
<LangVersion>12.0</LangVersion>
<AssemblyName>Raffinert.$(MSBuildProjectName)</AssemblyName>
<RootNamespace>Raffinert.$(MSBuildProjectName.Replace(" ", "_"))</RootNamespace>
</PropertyGroup>

<ItemGroup>
<PackageReference Include="nunit" Version="3.12.0" />
<PackageReference Include="NUnit.Console" Version="3.11.1" />
<PackageReference Include="NUnit3TestAdapter" Version="3.16.1">
<PackageReference Include="NUnit3TestAdapter" Version="4.6.0">
<PrivateAssets>all</PrivateAssets>
<IncludeAssets>runtime; build; native; contentfiles; analyzers; buildtransitive</IncludeAssets>
</PackageReference>
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="16.4.0" />
<PackageReference Include="Microsoft.NET.Test.Sdk" Version="17.10.0" />
</ItemGroup>

<ItemGroup>
Expand Down
Loading