diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs new file mode 100644 index 0000000..2da09d3 --- /dev/null +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -0,0 +1,120 @@ +using BenchmarkDotNet.Attributes; +using FuzzySharp.Extractor; +using FuzzySharp.PreProcess; + +namespace FuzzySharp.Benchmarks; + +[MemoryDiagnoser] +public class BenchmarkAll +{ + [Benchmark] + public int Ratio1() + { + return Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); + } + + [Benchmark] + public int Ratio2() + { + return Fuzz.Ratio("mysmilarstring", "mysimilarstring"); + } + + [Benchmark] + public int PartialRatio() + { + return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + } + + [Benchmark] + public int TokenSortRatio() + { + return Fuzz.TokenSortRatio("order words out of", " words out of order"); + } + + [Benchmark] + public int PartialTokenSortRatio() + { + return Fuzz.PartialTokenSortRatio("order words out of", " words out of order"); + } + + [Benchmark] + public int TokenSetRatio() + { + return Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + } + + [Benchmark] + public int PartialTokenSetRatio() + { + return Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + } + + [Benchmark] + public int WeightedRatio() + { + return Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); + } + + [Benchmark] + public int TokenInitialismRatio1() + { + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); + } + + [Benchmark] + public int TokenInitialismRatio2() + { + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); + } + + [Benchmark] + public int TokenInitialismRatio3() + { + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + } + + [Benchmark] + public int PartialTokenInitialismRatio() + { + return Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + } + + [Benchmark] + public int TokenAbbreviationRatio() + { + return Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + } + + [Benchmark] + public int PartialTokenAbbreviationRatio() + { + return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + } + + private static readonly string[][] Events = + [ + ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"], + ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"], + ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"] + ]; + + private static readonly string[] Query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"]; + + [Benchmark] + public ExtractedResult ExtractOne() + { + return Process.ExtractOne(Query, Events, static strings => strings[0]); + } + + [Benchmark] + public int LevenshteinDistance() + { + return Levenshtein.EditDistance("chicago cubs vs new york mets".AsSpan(), "new york mets vs chicago cubs".AsSpan()); + } + + [Benchmark] + public int FastenshteinDistance() + { + return Fastenshtein.Levenshtein.Distance("chicago cubs vs new york mets", "new york mets vs chicago cubs"); + } +} \ No newline at end of file diff --git a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj new file mode 100644 index 0000000..a6139a8 --- /dev/null +++ b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj @@ -0,0 +1,19 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + + diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs new file mode 100644 index 0000000..6f75f6a --- /dev/null +++ b/FuzzySharp.Benchmarks/Program.cs @@ -0,0 +1,6 @@ +using BenchmarkDotNet.Running; +using FuzzySharp; + +BenchmarkRunner.Run(typeof(Program).Assembly); + +//Console.WriteLine(Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog")); \ No newline at end of file diff --git a/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs b/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs index eb22945..1ff2b5f 100644 --- a/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs +++ b/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs @@ -36,20 +36,20 @@ public void Evaluate() - var h1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }); - var h2 = string.Join(", ", Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3)); - var h3 = string.Join(", ", Process.ExtractAll("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" })); - var h4 = string.Join(", ", Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40)); - var h5 = string.Join(", ", Process.ExtractSorted("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" })); - - var i1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get()); - - var events = new[] - { - new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" }, - new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" }, - new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" }, - }; + var h1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"]); + var h2 = string.Join(", ", Process.ExtractTop("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], limit: 3)); + var h3 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"])); + var h4 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], cutoff: 40)); + var h5 = string.Join(", ", Process.ExtractSorted("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"])); + + var i1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"], s => s, ScorerCache.Get()); + + string[][] events = + [ + ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"], + ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"], + ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"] + ]; var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" }; var best = Process.ExtractOne(query, events, strings => strings[0]); diff --git a/FuzzySharp.Test/FuzzySharp.Test.csproj b/FuzzySharp.Test/FuzzySharp.Test.csproj index 6ea573a..4d977ff 100644 --- a/FuzzySharp.Test/FuzzySharp.Test.csproj +++ b/FuzzySharp.Test/FuzzySharp.Test.csproj @@ -1,19 +1,19 @@ - netcoreapp3.1 - + NET8.0;netcoreapp3.1;netframework4.7.2 false + 12.0 - + all runtime; build; native; contentfiles; analyzers; buildtransitive - + diff --git a/FuzzySharp.Test/FuzzyTests/RegressionTests.cs b/FuzzySharp.Test/FuzzyTests/RegressionTests.cs index 134d2ee..9fea3f8 100644 --- a/FuzzySharp.Test/FuzzyTests/RegressionTests.cs +++ b/FuzzySharp.Test/FuzzyTests/RegressionTests.cs @@ -12,43 +12,37 @@ namespace FuzzySharp.Test.FuzzyTests public class RegressionTests { - /// /// Test to ensure that all IRatioScorer implementations handle scoring empty strings & whitespace strings /// [Test] public void TestScoringEmptyString() { - var scorerType = typeof(IRatioScorer); var assemblies = AppDomain.CurrentDomain.GetAssemblies().ToList(); var types = assemblies.SelectMany(s => { - Type[] types = new Type[] { }; ; try { - types = s.GetTypes(); + return s.GetTypes(); } catch {} - return types; + return []; }).ToList(); var scorerTypes = types.Where(t => scorerType.IsAssignableFrom(t) && !t.IsAbstract && t.IsClass).ToList(); - //var scorerTypes = AppDomain.CurrentDomain.GetAssemblies().SelectMany(s => s.GetTypes()).Where(p => scorerType.IsAssignableFrom(p) && p.IsClass && !p.IsAbstract); - - - MethodInfo getScorerCacheMethodInfo = typeof(ScorerCache).GetMethod("Get"); - + string nullString = null; //Null doesnt seem to be handled by any scorer string emptyString = ""; string whitespaceString = " "; - string[] nullOrWhitespaceStrings = { emptyString, whitespaceString }; + string[] nullOrWhitespaceStrings = [emptyString, whitespaceString]; + MethodInfo getScorerCacheMethodInfo = typeof(ScorerCache).GetMethod("Get"); - foreach (Type t in scorerTypes) + foreach (var t in scorerTypes) { System.Diagnostics.Debug.WriteLine($"Testing {t.Name}"); MethodInfo m = getScorerCacheMethodInfo.MakeGenericMethod(t); - IRatioScorer scorer = m.Invoke(this, new object[] { }) as IRatioScorer; + IRatioScorer scorer = m.Invoke(this, []) as IRatioScorer; foreach(string s in nullOrWhitespaceStrings) { @@ -79,7 +73,6 @@ public void TestScoringEmptyString() } - } } diff --git a/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs b/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs index 4cf7d6f..481b1f2 100644 --- a/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs +++ b/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs @@ -2,9 +2,6 @@ using FuzzySharp.SimilarityRatio.Scorer; using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; using NUnit.Framework; -using System; -using System.Collections.Generic; -using System.Text; namespace FuzzySharp.Test.FuzzyTests.ScorerTests { diff --git a/FuzzySharp.sln b/FuzzySharp.sln index 78eed94..168d5a0 100644 --- a/FuzzySharp.sln +++ b/FuzzySharp.sln @@ -1,11 +1,13 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.29806.167 +# Visual Studio Version 17 +VisualStudioVersion = 17.10.35122.118 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp", "FuzzySharp\FuzzySharp.csproj", "{348B90DA-DA44-45AD-B857-D3A69D05AE46}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzySharp.Test", "FuzzySharp.Test\FuzzySharp.Test.csproj", "{48F4C7CB-E669-410C-A455-DE3330347807}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp.Test", "FuzzySharp.Test\FuzzySharp.Test.csproj", "{48F4C7CB-E669-410C-A455-DE3330347807}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzySharp.Benchmarks", "FuzzySharp.Benchmarks\FuzzySharp.Benchmarks.csproj", "{480CAE39-ACA7-411A-BF6B-72E61ED6E129}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -21,6 +23,10 @@ Global {48F4C7CB-E669-410C-A455-DE3330347807}.Debug|Any CPU.Build.0 = Debug|Any CPU {48F4C7CB-E669-410C-A455-DE3330347807}.Release|Any CPU.ActiveCfg = Release|Any CPU {48F4C7CB-E669-410C-A455-DE3330347807}.Release|Any CPU.Build.0 = Release|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Debug|Any CPU.Build.0 = Debug|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Release|Any CPU.ActiveCfg = Release|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/FuzzySharp/Edits/MatchingBlock.cs b/FuzzySharp/Edits/MatchingBlock.cs index 585b6ad..16ea018 100644 --- a/FuzzySharp/Edits/MatchingBlock.cs +++ b/FuzzySharp/Edits/MatchingBlock.cs @@ -6,9 +6,6 @@ public class MatchingBlock public int DestPos { get; set; } public int Length { get; set; } - public override string ToString() - { - return $"({SourcePos},{DestPos},{Length})"; - } + public override string ToString() => $"({SourcePos},{DestPos},{Length})"; } } diff --git a/FuzzySharp/Extensions/StringExtensions.cs b/FuzzySharp/Extensions/StringExtensions.cs new file mode 100644 index 0000000..f05209a --- /dev/null +++ b/FuzzySharp/Extensions/StringExtensions.cs @@ -0,0 +1,67 @@ +using System; +using System.Collections.Generic; + +namespace FuzzySharp.Extensions +{ + internal static class StringExtensions + { + public static List ExtractTokens(this string input) + { + var result = new List(); + + if (string.IsNullOrEmpty(input)) + return result; + + var span = input.AsSpan(); + + var start = 0; + for (var i = 0; i < span.Length; i++) + { + if (char.IsLetter(span[i])) continue; + + if (i - start > 0) + { + result.Add(span[start..i].ToString()); + } + + start = i+1; + } + + if (span.Length - start > 0) + result.Add(span[start..].ToString()); + + return result; + } + + public static string[] SplitByAnySpace(this string input) + { + if (string.IsNullOrWhiteSpace(input)) + return []; + + var words = input.Split(EmptyArray(), StringSplitOptions.RemoveEmptyEntries); + + return words; + } + + public static string[] GetSortedWords(this string input) + { + var words = SplitByAnySpace(input); + + Array.Sort(words); + + return words; + } + + public static string NormalizeSpacesAndSort(this string input) + { + var words = GetSortedWords(input); + + return string.Join(" ", words); + } + + private static T[] EmptyArray() + { + return []; + } + } +} diff --git a/FuzzySharp/Extractor/ExtractedResult.cs b/FuzzySharp/Extractor/ExtractedResult.cs index 43f41e2..30aad9c 100644 --- a/FuzzySharp/Extractor/ExtractedResult.cs +++ b/FuzzySharp/Extractor/ExtractedResult.cs @@ -34,7 +34,7 @@ public override string ToString() { return $"(string: {Value}, score: {Score}, index: {Index})"; } - return $"(value: {Value.ToString()}, score: {Score}, index: {Index})"; + return $"(value: {Value}, score: {Score}, index: {Index})"; } } } diff --git a/FuzzySharp/Extractor/ResultExtractor.cs b/FuzzySharp/Extractor/ResultExtractor.cs index 66b7168..148e5ac 100644 --- a/FuzzySharp/Extractor/ResultExtractor.cs +++ b/FuzzySharp/Extractor/ResultExtractor.cs @@ -8,13 +8,12 @@ namespace FuzzySharp.Extractor { public static class ResultExtractor { - public static IEnumerable> ExtractWithoutOrder(T query, IEnumerable choices, Func processor, IRatioScorer scorer, int cutoff = 0) + public static IEnumerable> ExtractWithoutOrder(string query, IEnumerable choices, Func processor, IRatioScorer scorer, int cutoff = 0) { int index = 0; - var processedQuery = processor(query); foreach (var choice in choices) { - int score = scorer.Score(processedQuery, processor(choice)); + int score = scorer.Score(query, processor(choice)); if (score >= cutoff) { yield return new ExtractedResult(choice, score, index); @@ -23,6 +22,12 @@ public static IEnumerable> ExtractWithoutOrder(T query, IE } } + public static IEnumerable> ExtractWithoutOrder(T query, IEnumerable choices, Func processor, IRatioScorer scorer, int cutoff = 0) + { + var processedQuery = processor(query); + return ExtractWithoutOrder(processedQuery, choices, processor, scorer, cutoff); + } + public static ExtractedResult ExtractOne(T query, IEnumerable choices, Func processor, IRatioScorer calculator, int cutoff = 0) { return ExtractWithoutOrder(query, choices, processor, calculator, cutoff).Max(); diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index d2cee32..529ab21 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,38 +1,43 @@  - - netcoreapp2.0;netcoreapp2.1;netstandard1.6;netstandard2.0;netstandard2.1;net45;net46;net461 - true - Jacob Bayer - Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek - Fuzzy String Matching Comparison FuzzyWuzzy FuzzySharp - false - https://github.com/JakeBayer/FuzzySharp - MIT - git - - 2.0.2 - Include source link - true - https://github.com/JakeBayer/FuzzySharp - 1.0.4.0 - 1.0.4.0 - - true - true - snupkg + + 1.0.5.0 + Jacob Bayer + + Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek + 1.0.5.0 + true + true + 12.0 + MIT + https://github.com/JakeBayer/FuzzySharp + Include source link + false + Fuzzy String Matching Comparison FuzzyWuzzy FuzzySharp + true + git + https://github.com/JakeBayer/FuzzySharp + snupkg + netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;NET60;NET80 + 2.0.3 + + + + - + - - - - System - - - + + + diff --git a/FuzzySharp/Levenshtein.cs b/FuzzySharp/Levenshtein.cs index 5b620af..1660705 100644 --- a/FuzzySharp/Levenshtein.cs +++ b/FuzzySharp/Levenshtein.cs @@ -1,7 +1,5 @@ using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using FuzzySharp.Edits; namespace FuzzySharp @@ -10,21 +8,19 @@ public static class Levenshtein { private static EditOp[] GetEditOps(T[] arr1, T[] arr2) where T : IEquatable { - return GetEditOps(arr1.Length, arr1, arr2.Length, arr2); + return GetEditOps(arr1.Length, (ReadOnlySpan)arr1, arr2.Length, (ReadOnlySpan)arr2); } // Special Case - private static EditOp[] GetEditOps(string s1, string s2) + private static EditOp[] GetEditOps(ReadOnlySpan s1, ReadOnlySpan s2) { - return GetEditOps(s1.Length, s1.ToCharArray(), s2.Length, s2.ToCharArray()); + return GetEditOps(s1.Length, s1, s2.Length, s2); } - private static EditOp[] GetEditOps(int len1, T[] c1, int len2, T[] c2) where T : IEquatable + private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, ReadOnlySpan c2) where T : IEquatable { int i; - int[] matrix; - int p1 = 0; int p2 = 0; @@ -53,7 +49,7 @@ private static EditOp[] GetEditOps(int len1, T[] c1, int len2, T[] c2) where len1++; len2++; - matrix = new int[len2 * len1]; + int[] matrix = new int[len2 * len1]; for (i = 0; i < len2; i++) matrix[i] = i; @@ -98,13 +94,12 @@ private static EditOp[] GetEditOps(int len1, T[] c1, int len2, T[] c2) where } - return EditOpsFromCostMatrix(len1, c1, p1, len1o, len2, c2, p2, len2o, matrix); } - private static EditOp[] EditOpsFromCostMatrix(int len1, T[] c1, int p1, int o1, - int len2, T[] c2, int p2, int o2, + private static EditOp[] EditOpsFromCostMatrix(int len1, ReadOnlySpan c1, int p1, int o1, + int len2, ReadOnlySpan c2, int p2, int o2, int[] matrix) where T: IEquatable { @@ -232,7 +227,7 @@ public static MatchingBlock[] GetMatchingBlocks(T[] s1, T[] s2) where T : IEq } // Special Case - public static MatchingBlock[] GetMatchingBlocks(string s1, string s2) + public static MatchingBlock[] GetMatchingBlocks(ReadOnlySpan s1, ReadOnlySpan s2) { return GetMatchingBlocks(s1.Length, s2.Length, GetEditOps(s1, s2)); @@ -250,7 +245,7 @@ public static MatchingBlock[] GetMatchingBlocks(int len1, int len2, OpCode[] ops noOfMB = 0; - for (i = n; i-- != 0; o++) + for (i = n; i != 0; i--, o++) { if (ops[o].EditType == EditType.KEEP) { @@ -300,7 +295,7 @@ public static MatchingBlock[] GetMatchingBlocks(int len1, int len2, OpCode[] ops Debug.Assert(mb != noOfMB); - MatchingBlock finalBlock = new MatchingBlock + var finalBlock = new MatchingBlock { SourcePos = len1, DestPos = len2, @@ -328,7 +323,9 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op EditType type; - for (i = n; i != 0;) + i = n; + + while (i > 0) { @@ -383,9 +380,6 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op } while (i != 0 && ops[o].EditType == type && SourcePos == ops[o].SourcePos && DestPos == ops[o].DestPos); break; - - default: - break; } } @@ -400,8 +394,9 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op SourcePos = DestPos = 0; int mbIndex = 0; + i = n; - for (i = n; i != 0;) + while (i > 0) { while (ops[o].EditType == EditType.KEEP && --i != 0) @@ -412,11 +407,13 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op if (SourcePos < ops[o].SourcePos || DestPos < ops[o].DestPos) { - MatchingBlock mb = new MatchingBlock(); + var mb = new MatchingBlock + { + SourcePos = SourcePos, + DestPos = DestPos, + Length = ops[o].SourcePos - SourcePos + }; - mb.SourcePos = SourcePos; - mb.DestPos = DestPos; - mb.Length = ops[o].SourcePos - SourcePos; SourcePos = ops[o].SourcePos; DestPos = ops[o].DestPos; @@ -458,9 +455,6 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op } while (i != 0 && ops[o].EditType == type && SourcePos == ops[o].SourcePos && DestPos == ops[o].DestPos); break; - - default: - break; } } @@ -468,20 +462,24 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op { Debug.Assert(len1 -SourcePos == len2 - DestPos); - MatchingBlock mb = new MatchingBlock(); - mb.SourcePos = SourcePos; - mb.DestPos = DestPos; - mb.Length = len1 - SourcePos; + var mb = new MatchingBlock + { + SourcePos = SourcePos, + DestPos = DestPos, + Length = len1 - SourcePos + }; matchingBlocks[mbIndex++] = mb; } Debug.Assert(numberOfMatchingBlocks == mbIndex); - MatchingBlock finalBlock = new MatchingBlock(); - finalBlock.SourcePos = len1; - finalBlock.DestPos = len2; - finalBlock.Length = 0; + var finalBlock = new MatchingBlock + { + SourcePos = len1, + DestPos = len2, + Length = 0 + }; matchingBlocks[mbIndex] = finalBlock; @@ -499,7 +497,9 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) noOfBlocks = 0; SourcePos = DestPos = 0; - for (i = n; i != 0;) + i = n; + + while (i > 0) { while (ops[o].EditType == EditType.KEEP && --i != 0) @@ -555,9 +555,6 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) } while (i != 0 && ops[o].EditType == type && SourcePos == ops[o].SourcePos && DestPos == ops[o].DestPos); break; - - default: - break; } } @@ -570,7 +567,9 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) SourcePos = DestPos = 0; int oIndex = 0; - for (i = n; i != 0;) + i = n; + + while (i > 0) { while (ops[o].EditType == EditType.KEEP && --i != 0) @@ -648,16 +647,15 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) { Debug.Assert(len1 - SourcePos == len2 - DestPos); - if (opCodes[oIndex] == null) - opCodes[oIndex] = new OpCode(); - opCodes[oIndex].EditType = EditType.KEEP; - opCodes[oIndex].SourceBegin = SourcePos; - opCodes[oIndex].DestBegin = DestPos; - opCodes[oIndex].SourceEnd = len1; - opCodes[oIndex].DestEnd = len2; - oIndex++; + var opcode = opCodes[oIndex] ?? (opCodes[oIndex] = new OpCode()); + opcode.EditType = EditType.KEEP; + opcode.SourceBegin = SourcePos; + opcode.DestBegin = DestPos; + opcode.SourceEnd = len1; + opcode.DestEnd = len2; + oIndex++; } Debug.Assert(oIndex == noOfBlocks); @@ -665,13 +663,7 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) return opCodes; } - // Special Case - public static int EditDistance(string s1, string s2, int xcost = 0) - { - return EditDistance(s1.ToCharArray(), s2.ToCharArray(), xcost); - } - - public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEquatable + public static int EditDistance(ReadOnlySpan c1, ReadOnlySpan c2, int xcost = 0) where T: IEquatable { int i; @@ -720,7 +712,7 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua str1 = str2; str2 = temp; - T[] t = c2; + ReadOnlySpan t = c2; c2 = c1; c1 = t; @@ -733,10 +725,8 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua { return len2 + 1 - 2 * Memchr(c2, str2, c1[str1], len2); } - else - { - return len2 - Memchr(c2, str2, c1[str1], len2); - } + + return len2 - Memchr(c2, str2, c1[str1], len2); } len1++; @@ -773,7 +763,7 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua if (ch1.Equals(c2[c2p++])) { - x = --D; + x = D-1; } else { @@ -839,7 +829,7 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua /* main */ while (p <= end) { - int c3 = --D + (!ch1.Equals(c2[c2p++]) ? 1 : 0); + int c3 = D-1 + (!ch1.Equals(c2[c2p++]) ? 1 : 0); x++; if (x > c3) { @@ -856,7 +846,7 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua /* lower triangle sentinel */ if (i <= half) { - int c3 = --D + (!ch1.Equals(c2[c2p]) ? 1 : 0); + int c3 = D - 1 + (!ch1.Equals(c2[c2p]) ? 1 : 0); x++; if (x > c3) { @@ -873,7 +863,7 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua } - private static int Memchr(T[] haystack, int offset, T needle, int num) where T : IEquatable + private static int Memchr(ReadOnlySpan haystack, int offset, T needle, int num) where T : IEquatable { if (num != 0) @@ -899,20 +889,18 @@ public static double GetRatio(T[] input1, T[] input2) where T : IEquatable int len2 = input2.Length; int lensum = len1 + len2; - int editDistance = EditDistance(input1, input2, 1); + int editDistance = EditDistance(input1.AsSpan(), input2.AsSpan(), 1); return editDistance == 0 ? 1 : (lensum - editDistance) / (double)lensum; } - public static double GetRatio(IEnumerable input1, IEnumerable input2) where T : IEquatable + public static double GetRatio(ReadOnlySpan input1, ReadOnlySpan input2) where T : IEquatable { - var s1 = input1.ToArray(); - var s2 = input2.ToArray(); - int len1 = s1.Length; - int len2 = s2.Length; + int len1 = input1.Length; + int len2 = input2.Length; int lensum = len1 + len2; - int editDistance = EditDistance(s1, s2, 1); + int editDistance = EditDistance(input1, input2, 1); return editDistance == 0 ? 1 : (lensum - editDistance) / (double)lensum; } @@ -920,7 +908,7 @@ public static double GetRatio(IEnumerable input1, IEnumerable input2) w // Special Case public static double GetRatio(string s1, string s2) { - return GetRatio(s1.ToCharArray(), s2.ToCharArray()); + return GetRatio(s1.AsSpan(), s2.AsSpan()); } } } diff --git a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs index 0cc5647..e0aaa59 100644 --- a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs +++ b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs @@ -1,31 +1,35 @@ using System; -using System.Text.RegularExpressions; namespace FuzzySharp.PreProcess { - internal class StringPreprocessorFactory + internal static class StringPreprocessorFactory { - private static string pattern = "[^ a-zA-Z0-9]"; - private static string Default(string input) { - input = Regex.Replace(input, pattern, " "); - input = input.ToLower(); + if (string.IsNullOrWhiteSpace(input)) + { + return string.Empty; + } + + var result = new char[input.Length].AsSpan(); - return input.Trim(); + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + result[i] = char.IsLetterOrDigit(c) ? char.ToLower(c) : ' '; + } + + return ((ReadOnlySpan)result).Trim().ToString(); } public static Func GetPreprocessor(PreprocessMode mode) { - switch (mode) + return mode switch { - case PreprocessMode.Full: - return Default; - case PreprocessMode.None: - return s => s; - default: - throw new InvalidOperationException($"Invalid string preprocessor mode: {mode}"); - } + PreprocessMode.Full => Default, + PreprocessMode.None => static s => s, + _ => throw new InvalidOperationException($"Invalid string preprocessor mode: {mode}") + }; } } } diff --git a/FuzzySharp/Process.cs b/FuzzySharp/Process.cs index dbc5caf..9e33700 100644 --- a/FuzzySharp/Process.cs +++ b/FuzzySharp/Process.cs @@ -31,8 +31,8 @@ public static IEnumerable> ExtractAll( IRatioScorer scorer = null, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractWithoutOrder(query, choices, processor, scorer, cutoff); } @@ -53,7 +53,28 @@ public static IEnumerable> ExtractAll( IRatioScorer scorer = null, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; + return ResultExtractor.ExtractWithoutOrder(query, choices, processor, scorer, cutoff); + } + + /// + /// Creates a list of ExtractedResult which contain all the choices with + /// their corresponding score where higher is more similar + /// + /// + /// + /// + /// + /// + /// + public static IEnumerable> ExtractAll( + string query, + IEnumerable choices, + Func processor, + IRatioScorer scorer = null, + int cutoff = 0) + { + scorer ??= s_defaultScorer; return ResultExtractor.ExtractWithoutOrder(query, choices, processor, scorer, cutoff); } #endregion @@ -78,8 +99,8 @@ public static IEnumerable> ExtractTop( int limit = 5, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractTop(query, choices, processor, scorer, limit, cutoff); } @@ -103,7 +124,7 @@ public static IEnumerable> ExtractTop( int limit = 5, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractTop(query, choices, processor, scorer, limit, cutoff); } #endregion @@ -125,8 +146,8 @@ public static IEnumerable> ExtractSorted( IRatioScorer scorer = null, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractSorted(query, choices, processor, scorer, cutoff); } @@ -146,7 +167,7 @@ public static IEnumerable> ExtractSorted( IRatioScorer scorer = null, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractSorted(query, choices, processor, scorer, cutoff); } #endregion @@ -168,8 +189,8 @@ public static ExtractedResult ExtractOne( IRatioScorer scorer = null, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractOne(query, choices, processor, scorer, cutoff); } @@ -189,7 +210,7 @@ public static ExtractedResult ExtractOne( IRatioScorer scorer = null, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractOne(query, choices, processor, scorer, cutoff); } diff --git a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs index b3744ec..2700f15 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs @@ -1,5 +1,4 @@ using System; -using System.Linq; namespace FuzzySharp.SimilarityRatio.Scorer.Composite { @@ -38,16 +37,12 @@ public override int Score(string input1, string input2) double partialSor = Fuzz.TokenSortRatio(input1, input2) * unbaseScale * partialScale; double partialSet = Fuzz.TokenSetRatio(input1, input2) * unbaseScale * partialScale; - return (int) Math.Round(new[] { baseRatio, partial, partialSor, partialSet }.Max()); + return (int) Math.Round(Math.Max(baseRatio, Math.Max(partial, Math.Max(partialSor, partialSet)))); } - else - { - double tokenSort = Fuzz.TokenSortRatio(input1, input2) * unbaseScale; - double tokenSet = Fuzz.TokenSetRatio(input1, input2) * unbaseScale; - return (int) Math.Round(new[] { baseRatio, tokenSort, tokenSet }.Max()); - } + double tokenSort = Fuzz.TokenSortRatio(input1, input2) * unbaseScale; + double tokenSet = Fuzz.TokenSetRatio(input1, input2) * unbaseScale; + return (int) Math.Round(Math.Max(baseRatio, Math.Max(tokenSort, tokenSet))); } - } } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs index 98c95ce..8e5a9b1 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs @@ -1,6 +1,6 @@ -using System.Collections.Generic; -using System.Linq; -using System.Text.RegularExpressions; +using System; +using System.Collections.Generic; +using FuzzySharp.Extensions; using FuzzySharp.Utils; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive @@ -23,25 +23,25 @@ public override int Score(string input1, string input2) longer = input1; } - double lenRatio = ((double)longer.Length) / shorter.Length; + double lenRatio = (double)longer.Length / shorter.Length; // if longer isn't at least 1.5 times longer than the other, then its probably not an abbreviation if (lenRatio < 1.5) return 0; // numbers can't be abbreviations for other numbers, though that would be hilarious. "Yes, 4 - as in 4,238" - var tokensLonger = Regex.Matches(longer, @"[a-zA-Z]+").Cast().Select(m => m.Value).ToArray(); - var tokensShorter = Regex.Matches(shorter, @"[a-zA-Z]+").Cast().Select(m => m.Value).ToArray(); + var tokensLonger = longer.ExtractTokens(); + var tokensShorter = shorter.ExtractTokens(); // more than 4 tokens and it's probably not an abbreviation (and could get costly) - if (tokensShorter.Length > 4) + if (tokensShorter.Count > 4) { return 0; } - string[] moreTokens; - string[] fewerTokens; + List moreTokens; + List fewerTokens; - if (tokensLonger.Length > tokensShorter.Length) + if (tokensLonger.Count > tokensShorter.Count) { moreTokens = tokensLonger; fewerTokens = tokensShorter; @@ -52,26 +52,31 @@ public override int Score(string input1, string input2) fewerTokens = tokensLonger; } - var allPermutations = moreTokens.PermutationsOfSize(fewerTokens.Length); + var allPermutations = moreTokens.PermutationsOfSize(fewerTokens.Count); + + int maxScore = 0; - List allScores = new List(); foreach (var permutation in allPermutations) { double sum = 0; - for (int i = 0; i < fewerTokens.Length; i++) + for (int i = 0; i < fewerTokens.Count; i++) { var i1 = permutation[i]; var i2 = fewerTokens[i]; - if (StringContainsInOrder(i1, i2)) // must be at least twice as long + if (StringContainsInOrder(i1.AsSpan(), i2.AsSpan())) // must be at least twice as long { var score = Scorer(i1, i2); sum += score; } } - allScores.Add((int) (sum / fewerTokens.Length)); + var avgScore = (int) (sum / fewerTokens.Count); + if(avgScore > maxScore) + { + maxScore = avgScore; + } } - return allScores.Count==0?0:allScores.Max(); + return maxScore; } /// @@ -80,7 +85,7 @@ public override int Score(string input1, string input2) /// /// /// - private bool StringContainsInOrder(string s1, string s2) + private static bool StringContainsInOrder(ReadOnlySpan s1, ReadOnlySpan s2) { if (s1.Length < s2.Length) return false; int s2_idx = 0; diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs index 11036af..0e02491 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs @@ -1,5 +1,4 @@ -using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; using FuzzySharp.PreProcess; using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive.Generic; @@ -14,8 +13,8 @@ public override int Score(string[] input1, string[] input2) public int Score(string input1, string input2) { - var tokens1 = Regex.Split(input1, @"\s+").Where(s => s.Any()).OrderBy(s => s).ToArray(); - var tokens2 = Regex.Split(input2, @"\s+").Where(s => s.Any()).OrderBy(s => s).ToArray(); + var tokens1 = input1.GetSortedWords(); + var tokens2 = input2.GetSortedWords(); return Score(tokens1, tokens2); } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs index 10aa1af..4dd3a6b 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs @@ -1,5 +1,5 @@ using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { @@ -26,9 +26,9 @@ public override int Score(string input1, string input2) // if longer isn't at least 3 times longer than the other, then it's probably not an initialism if (lenRatio < 3) return 0; - var initials = Regex.Split(longer, @"\s+").Where(s => s.Any()).Select(s => s[0]); + var initials = longer.SplitByAnySpace().Select(s => s[0]).ToArray(); - return Scorer(string.Join("", initials), shorter); + return Scorer(new string(initials), shorter); } } } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 785de55..af61e86 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -1,7 +1,7 @@ using System; using System.Collections.Generic; using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { @@ -9,19 +9,36 @@ public abstract class TokenSetScorerBase : StrategySensitiveScorerBase { public override int Score(string input1, string input2) { - var tokens1 = new HashSet(Regex.Split(input1, @"\s+").Where(s => s.Any())); - var tokens2 = new HashSet(Regex.Split(input2, @"\s+").Where(s => s.Any())); + var tokens1 = new HashSet(input1.SplitByAnySpace()); + var tokens2 = new HashSet(input2.SplitByAnySpace()); - var sortedIntersection = String.Join(" ", tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); - var sortedDiff1To2 = (sortedIntersection + " " + String.Join(" ", tokens1.Except(tokens2).OrderBy(s => s))).Trim(); - var sortedDiff2To1 = (sortedIntersection + " " + String.Join(" ", tokens2.Except(tokens1).OrderBy(s => s))).Trim(); + var intersection = GetIntersectionAndExcept(tokens1, tokens2); - return new[] + var sortedIntersection = string.Join(" ", intersection.OrderBy(s => s)); + var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.OrderBy(s => s))).Trim(); + var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.OrderBy(s => s))).Trim(); + + var score1 = Scorer(sortedIntersection, sortedDiff1To2); + var score2 = Scorer(sortedIntersection, sortedDiff2To1); + var score3 = Scorer(sortedDiff1To2, sortedDiff2To1); + + return Math.Max(score1, Math.Max(score2, score3)); + } + + private static List GetIntersectionAndExcept(HashSet first, HashSet second) + { + List intersection = []; + + foreach (var item in first.ToArray()) { - Scorer(sortedIntersection, sortedDiff1To2), - Scorer(sortedIntersection, sortedDiff2To1), - Scorer(sortedDiff1To2, sortedDiff2To1) - }.Max(); + if (second.Remove(item)) + { + first.Remove(item); + intersection.Add(item); + } + } + + return intersection; } } } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs index dbfa10a..032b779 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs @@ -1,6 +1,4 @@ -using System; -using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { @@ -8,8 +6,8 @@ public abstract class TokenSortScorerBase : StrategySensitiveScorerBase { public override int Score(string input1, string input2) { - var sorted1 = String.Join(" ", Regex.Split(input1, @"\s+").Where(s => s.Any()).OrderBy(s => s)).Trim(); - var sorted2 = String.Join(" ", Regex.Split(input2, @"\s+").Where(s => s.Any()).OrderBy(s => s)).Trim(); + var sorted1 = input1.NormalizeSpacesAndSort(); + var sorted2 = input2.NormalizeSpacesAndSort(); return Scorer(sorted1, sorted2); } diff --git a/FuzzySharp/SimilarityRatio/ScorerCache.cs b/FuzzySharp/SimilarityRatio/ScorerCache.cs index 34b405e..15229bb 100644 --- a/FuzzySharp/SimilarityRatio/ScorerCache.cs +++ b/FuzzySharp/SimilarityRatio/ScorerCache.cs @@ -1,15 +1,17 @@ -using System; -using System.Collections.Concurrent; +using System.Runtime.CompilerServices; using FuzzySharp.SimilarityRatio.Scorer; namespace FuzzySharp.SimilarityRatio { public static class ScorerCache { - private static readonly ConcurrentDictionary s_scorerCache = new ConcurrentDictionary(); - public static IRatioScorer Get() where T : IRatioScorer, new() + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static IRatioScorer Get() where T : IRatioScorer, new() => GenericCache.Instance; + + private static class GenericCache + where T : IRatioScorer, new() { - return s_scorerCache.GetOrAdd(typeof(T), new T()); + public static readonly T Instance = new T(); } } -} +} \ No newline at end of file diff --git a/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs index 8e8fac2..72d6cd3 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs @@ -2,7 +2,7 @@ namespace FuzzySharp.SimilarityRatio.Strategy { - internal class DefaultRatioStrategy + internal static class DefaultRatioStrategy { public static int Calculate(string input1, string input2) { diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs index 2fdfb08..f6efd79 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs @@ -2,7 +2,7 @@ namespace FuzzySharp.SimilarityRatio.Strategy.Generic { - internal class DefaultRatioStrategy where T : IEquatable + internal static class DefaultRatioStrategy where T : IEquatable { public static int Calculate(T[] input1, T[] input2) { diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index a536da4..2f35fce 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -1,11 +1,9 @@ using System; -using System.Collections.Generic; -using System.Linq; using FuzzySharp.Edits; namespace FuzzySharp.SimilarityRatio.Strategy.Generic { - internal class PartialRatioStrategy where T : IEquatable + internal static class PartialRatioStrategy where T : IEquatable { public static int Calculate(T[] input1, T[] input2) { @@ -30,7 +28,7 @@ public static int Calculate(T[] input1, T[] input2) MatchingBlock[] matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); - List scores = new List(); + double maxScore = 0; foreach (var matchingBlock in matchingBlocks) { @@ -41,20 +39,22 @@ public static int Calculate(T[] input1, T[] input2) if (longEnd > longer.Length) longEnd = longer.Length; - var longSubstr = longer.Skip(longStart).Take(longEnd - longStart); + var longSubstr = longer.AsSpan()[longStart..longEnd]; - double ratio = Levenshtein.GetRatio(shorter, longSubstr); + double ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); if (ratio > .995) { return 100; } - scores.Add(ratio); - + if (ratio > maxScore) + { + maxScore = ratio; + } } - return (int)Math.Round(100 * scores.Max()); + return (int)Math.Round(100 * maxScore); } } } diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index 1d25991..20d9b26 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -1,36 +1,34 @@ using System; -using System.Collections.Generic; -using System.Linq; using FuzzySharp.Edits; namespace FuzzySharp.SimilarityRatio.Strategy { - internal class PartialRatioStrategy + internal static class PartialRatioStrategy { public static int Calculate(string input1, string input2) { - string shorter; - string longer; - if (input1.Length == 0 || input2.Length == 0) { return 0; } + ReadOnlySpan shorter; + ReadOnlySpan longer; + if (input1.Length < input2.Length) { - shorter = input1; - longer = input2; + shorter = input1.AsSpan(); + longer = input2.AsSpan(); } else { - shorter = input2; - longer = input1; + shorter = input2.AsSpan(); + longer = input1.AsSpan(); } MatchingBlock[] matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); - List scores = new List(); + double maxScore = 0; foreach (var matchingBlock in matchingBlocks) { @@ -41,7 +39,7 @@ public static int Calculate(string input1, string input2) if (longEnd > longer.Length) longEnd = longer.Length; - string longSubstr = longer.Substring(longStart, longEnd - longStart); + var longSubstr = longer[longStart..longEnd]; double ratio = Levenshtein.GetRatio(shorter, longSubstr); @@ -50,11 +48,13 @@ public static int Calculate(string input1, string input2) return 100; } - scores.Add(ratio); - + if (ratio > maxScore) + { + maxScore = ratio; + } } - return (int)Math.Round(100 * scores.Max()); + return (int)Math.Round(100 * maxScore); } } } diff --git a/FuzzySharp/Utils/Heap.cs b/FuzzySharp/Utils/Heap.cs index b890982..a732edb 100644 --- a/FuzzySharp/Utils/Heap.cs +++ b/FuzzySharp/Utils/Heap.cs @@ -11,13 +11,11 @@ public abstract class Heap : IEnumerable private const int GrowFactor = 2; private const int MinGrow = 1; - private int _capacity = InitialCapacity; private T[] _heap = new T[InitialCapacity]; - private int _tail = 0; - public int Count => _tail; + public int Count { get; private set; } - public int Capacity => _capacity; + public int Capacity { get; private set; } = InitialCapacity; protected Comparer Comparer { get; } protected abstract bool Dominates(T x, T y); @@ -26,7 +24,7 @@ protected Heap() : this(Comparer.Default) { } - protected Heap(Comparer comparer) : this(Enumerable.Empty(), comparer) + protected Heap(Comparer comparer) : this([], comparer) { } @@ -37,19 +35,18 @@ protected Heap(IEnumerable collection) protected Heap(IEnumerable collection, Comparer comparer) { - if (collection == null) throw new ArgumentNullException(nameof(collection)); - Comparer = comparer ?? throw new ArgumentNullException(nameof(comparer)); + _ = collection ?? throw new ArgumentNullException(nameof(collection)); foreach (var item in collection) { if (Count == Capacity) Grow(); - _heap[_tail++] = item; + _heap[Count++] = item; } - for (int i = Parent(_tail - 1); i >= 0; i--) + for (int i = Parent(Count - 1); i >= 0; i--) BubbleDown(i); } @@ -58,8 +55,8 @@ public void Add(T item) if (Count == Capacity) Grow(); - _heap[_tail++] = item; - BubbleUp(_tail - 1); + _heap[Count++] = item; + BubbleUp(Count - 1); } private void BubbleUp(int i) @@ -83,8 +80,8 @@ public T ExtractDominating() { if (Count == 0) throw new InvalidOperationException("Heap is empty"); T ret = _heap[0]; - _tail--; - Swap(_tail, 0); + Count--; + Swap(Count, 0); BubbleDown(0); return ret; } @@ -93,7 +90,7 @@ private void BubbleDown(int i) { while (true) { - int dominatingNode = Dominating(i); + var dominatingNode = Dominating(i); if (dominatingNode == i) return; Swap(i, dominatingNode); i = dominatingNode; @@ -111,17 +108,15 @@ private int Dominating(int i) private int GetDominating(int newNode, int dominatingNode) { - if (newNode < _tail && !Dominates(_heap[dominatingNode], _heap[newNode])) + if (newNode < Count && !Dominates(_heap[dominatingNode], _heap[newNode])) return newNode; - else - return dominatingNode; + + return dominatingNode; } private void Swap(int i, int j) { - T tmp = _heap[i]; - _heap[i] = _heap[j]; - _heap[j] = tmp; + (_heap[i], _heap[j]) = (_heap[j], _heap[i]); } private static int Parent(int i) @@ -141,11 +136,11 @@ private static int OldChild(int i) private void Grow() { - int newCapacity = _capacity * GrowFactor + MinGrow; + int newCapacity = Capacity * GrowFactor + MinGrow; var newHeap = new T[newCapacity]; - Array.Copy(_heap, newHeap, _capacity); + Array.Copy(_heap, newHeap, Capacity); _heap = newHeap; - _capacity = newCapacity; + Capacity = newCapacity; } public IEnumerator GetEnumerator() diff --git a/FuzzySharp/Utils/Permutation.cs b/FuzzySharp/Utils/Permutation.cs index e6c0976..d28a748 100644 --- a/FuzzySharp/Utils/Permutation.cs +++ b/FuzzySharp/Utils/Permutation.cs @@ -15,7 +15,7 @@ public Permutor(IEnumerable set) public List PermutationAt(long i) { - var set = new List(_set.OrderBy(e => e).ToList()); + var set = new List(_set.OrderBy(e => e)); for (long j = 0; j < i - 1; j++) { NextPermutation(set); @@ -62,22 +62,22 @@ public bool NextPermutation(List set) public static class Permutation { - public static List> AllPermutations(this IEnumerable seed) + private static IEnumerable> AllPermutations(this IEnumerable seed) { var set = new List(seed); - return Permute(set, 0, set.Count - 1).ToList(); + return Permute(set, 0, set.Count - 1); } - public static List> PermutationsOfSize(this IEnumerable seed, int size) + public static IEnumerable> PermutationsOfSize(this List seed, int size) { - if (seed.Count() < size) - { - return new List>(); - } - return seed.PermutationsOfSize(new List(), size).ToList(); + var result = seed.Count < size + ? [] + : seed.PermutationsOfSize([], size); + + return result; } - private static IEnumerable> PermutationsOfSize(this IEnumerable seed, List set, int size) + private static IEnumerable> PermutationsOfSize(this List seed, List set, int size) { if (size == 0) { @@ -85,17 +85,16 @@ private static IEnumerable> PermutationsOfSize(this IEnumerable se { yield return permutation; } + + yield break; } - else + + for (int i = 0; i < seed.Count; i++) { - var seedAsList = seed.ToList(); - for (int i = 0; i < seedAsList.Count; i++) + var newSet = new List(set) { seed[i] }; + foreach (var permutation in seed.Skip(i + 1).ToList().PermutationsOfSize(newSet, size - 1)) { - var newSet = new List(set) { seedAsList[i] }; - foreach (var permutation in seedAsList.Skip(i + 1).PermutationsOfSize(newSet, size - 1)) - { - yield return permutation; - } + yield return permutation; } } } @@ -104,7 +103,7 @@ private static IEnumerable> Permute(List set, int start, int end) { if (start == end) { - yield return new List(set); + yield return [..set]; } else { @@ -122,9 +121,7 @@ private static IEnumerable> Permute(List set, int start, int end) private static void Swap(List set, int a, int b) { - var temp = set[a]; - set[a] = set[b]; - set[b] = temp; + (set[a], set[b]) = (set[b], set[a]); } public static IEnumerable> Cycles(IEnumerable seed) @@ -132,8 +129,8 @@ public static IEnumerable> Cycles(IEnumerable seed) var set = new LinkedList(seed); for (int i = 0; i < set.Count; i++) { - yield return new List(set); - var top = set.First(); + yield return [..set]; + var top = set.First!; set.RemoveFirst(); set.AddLast(top); }