From cf17a0b6020a43aea074d6b763d80272aa22fd09 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 26 May 2024 14:29:43 +0200 Subject: [PATCH 01/30] replace arrays with spans --- FuzzySharp.Test/FuzzySharp.Test.csproj | 2 +- FuzzySharp/FuzzySharp.csproj | 7 +--- FuzzySharp/Levenshtein.cs | 42 +++++++------------ .../Strategy/Generic/PartialRatioStrategyT.cs | 4 +- 4 files changed, 21 insertions(+), 34 deletions(-) diff --git a/FuzzySharp.Test/FuzzySharp.Test.csproj b/FuzzySharp.Test/FuzzySharp.Test.csproj index 6ea573a..876916e 100644 --- a/FuzzySharp.Test/FuzzySharp.Test.csproj +++ b/FuzzySharp.Test/FuzzySharp.Test.csproj @@ -1,7 +1,7 @@ - netcoreapp3.1 + NET8.0 false diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index d2cee32..74ee9b4 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,7 +1,7 @@  - netcoreapp2.0;netcoreapp2.1;netstandard1.6;netstandard2.0;netstandard2.1;net45;net46;net461 + netstandard2.1 true Jacob Bayer Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek @@ -24,14 +24,11 @@ - + - - System - diff --git a/FuzzySharp/Levenshtein.cs b/FuzzySharp/Levenshtein.cs index 5b620af..80abb80 100644 --- a/FuzzySharp/Levenshtein.cs +++ b/FuzzySharp/Levenshtein.cs @@ -1,7 +1,5 @@ using System; -using System.Collections.Generic; using System.Diagnostics; -using System.Linq; using FuzzySharp.Edits; namespace FuzzySharp @@ -10,20 +8,20 @@ public static class Levenshtein { private static EditOp[] GetEditOps(T[] arr1, T[] arr2) where T : IEquatable { - return GetEditOps(arr1.Length, arr1, arr2.Length, arr2); + return GetEditOps(arr1.Length, (ReadOnlySpan)arr1, arr2.Length, (ReadOnlySpan)arr2); } // Special Case private static EditOp[] GetEditOps(string s1, string s2) { - return GetEditOps(s1.Length, s1.ToCharArray(), s2.Length, s2.ToCharArray()); + return GetEditOps(s1.Length, s1.AsSpan(), s2.Length, s2.AsSpan()); } - private static EditOp[] GetEditOps(int len1, T[] c1, int len2, T[] c2) where T : IEquatable + private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, ReadOnlySpan c2) where T : IEquatable { int i; - int[] matrix; + Span matrix; int p1 = 0; int p2 = 0; @@ -103,9 +101,9 @@ private static EditOp[] GetEditOps(int len1, T[] c1, int len2, T[] c2) where } - private static EditOp[] EditOpsFromCostMatrix(int len1, T[] c1, int p1, int o1, - int len2, T[] c2, int p2, int o2, - int[] matrix) + private static EditOp[] EditOpsFromCostMatrix(int len1, ReadOnlySpan c1, int p1, int o1, + int len2, ReadOnlySpan c2, int p2, int o2, + Span matrix) where T: IEquatable { @@ -665,13 +663,7 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) return opCodes; } - // Special Case - public static int EditDistance(string s1, string s2, int xcost = 0) - { - return EditDistance(s1.ToCharArray(), s2.ToCharArray(), xcost); - } - - public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEquatable + public static int EditDistance(ReadOnlySpan c1, ReadOnlySpan c2, int xcost = 0) where T: IEquatable { int i; @@ -720,7 +712,7 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua str1 = str2; str2 = temp; - T[] t = c2; + ReadOnlySpan t = c2; c2 = c1; c1 = t; @@ -873,7 +865,7 @@ public static int EditDistance(T[] c1, T[] c2, int xcost = 0) where T: IEqua } - private static int Memchr(T[] haystack, int offset, T needle, int num) where T : IEquatable + private static int Memchr(ReadOnlySpan haystack, int offset, T needle, int num) where T : IEquatable { if (num != 0) @@ -899,20 +891,18 @@ public static double GetRatio(T[] input1, T[] input2) where T : IEquatable int len2 = input2.Length; int lensum = len1 + len2; - int editDistance = EditDistance(input1, input2, 1); + int editDistance = EditDistance(input1.AsSpan(), input2.AsSpan(), 1); return editDistance == 0 ? 1 : (lensum - editDistance) / (double)lensum; } - public static double GetRatio(IEnumerable input1, IEnumerable input2) where T : IEquatable + public static double GetRatio(ReadOnlySpan input1, ReadOnlySpan input2) where T : IEquatable { - var s1 = input1.ToArray(); - var s2 = input2.ToArray(); - int len1 = s1.Length; - int len2 = s2.Length; + int len1 = input1.Length; + int len2 = input2.Length; int lensum = len1 + len2; - int editDistance = EditDistance(s1, s2, 1); + int editDistance = EditDistance(input1, input2, 1); return editDistance == 0 ? 1 : (lensum - editDistance) / (double)lensum; } @@ -920,7 +910,7 @@ public static double GetRatio(IEnumerable input1, IEnumerable input2) w // Special Case public static double GetRatio(string s1, string s2) { - return GetRatio(s1.ToCharArray(), s2.ToCharArray()); + return GetRatio(s1.AsSpan(), s2.AsSpan()); } } } diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index a536da4..8983b4d 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -41,9 +41,9 @@ public static int Calculate(T[] input1, T[] input2) if (longEnd > longer.Length) longEnd = longer.Length; - var longSubstr = longer.Skip(longStart).Take(longEnd - longStart); + var longSubstr = longer.AsSpan().Slice(longStart, longEnd - longStart); - double ratio = Levenshtein.GetRatio(shorter, longSubstr); + double ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); if (ratio > .995) { From d4e0613ac30f7986f1067703de0a61733d7782c3 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Wed, 31 Jul 2024 17:14:46 +0200 Subject: [PATCH 02/30] further optimizations --- .../ScorerTests/TokenSetScorerBaseTest.cs | 3 - FuzzySharp/Extensions/StringExtensions.cs | 63 ++++++++++++++ FuzzySharp/Levenshtein.cs | 84 +++++++++---------- .../PreProcess/StringPreprocessorFactory.cs | 21 +++-- .../TokenAbbreviationScorerBase.cs | 22 ++--- .../TokenDifferenceScorerBase.cs | 7 +- .../TokenInitialismScorerBase.cs | 4 +- .../TokenSet/TokenSetScorerBase.cs | 15 ++-- .../TokenSort/TokenSortAlgorithm.cs | 8 +- .../Strategy/Generic/PartialRatioStrategyT.cs | 15 ++-- .../Strategy/PartialRatioStrategy.cs | 15 ++-- FuzzySharp/Utils/Heap.cs | 40 ++++----- FuzzySharp/Utils/Permutation.cs | 4 +- 13 files changed, 178 insertions(+), 123 deletions(-) create mode 100644 FuzzySharp/Extensions/StringExtensions.cs diff --git a/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs b/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs index 4cf7d6f..481b1f2 100644 --- a/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs +++ b/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs @@ -2,9 +2,6 @@ using FuzzySharp.SimilarityRatio.Scorer; using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; using NUnit.Framework; -using System; -using System.Collections.Generic; -using System.Text; namespace FuzzySharp.Test.FuzzyTests.ScorerTests { diff --git a/FuzzySharp/Extensions/StringExtensions.cs b/FuzzySharp/Extensions/StringExtensions.cs new file mode 100644 index 0000000..1ce26ad --- /dev/null +++ b/FuzzySharp/Extensions/StringExtensions.cs @@ -0,0 +1,63 @@ +using System; +using System.Collections.Generic; + +namespace FuzzySharp.Extensions +{ + internal static class StringExtensions + { + public static List ExtractLetterOnlyWords(this string input) + { + var result = new List(); + + if (string.IsNullOrEmpty(input)) + return result; + + var span = input.AsSpan(); + + int start = 0; + for (var i = 0; i < span.Length; i++) + { + if (!char.IsLetter(span[i])) + { + if (i - start > 0) + { + result.Add(span.Slice(start, i - start).ToString()); + } + + start = i+1; + } + } + + if (span.Length - start > 0) + result.Add(span.Slice(start, span.Length - start).ToString()); + + return result; + } + + public static string[] SplitByAnySpace(this string input) + { + if (string.IsNullOrWhiteSpace(input)) + return Array.Empty(); + + var words = input.Split(Array.Empty(), StringSplitOptions.RemoveEmptyEntries); + + return words; + } + + public static string[] GetSortedWords(this string input) + { + var words = SplitByAnySpace(input); + + Array.Sort(words); + + return words; + } + + public static string NormalizeSpacesAndSort(this string input) + { + var words = GetSortedWords(input); + + return string.Join(' ', words); + } + } +} diff --git a/FuzzySharp/Levenshtein.cs b/FuzzySharp/Levenshtein.cs index 80abb80..63d89a9 100644 --- a/FuzzySharp/Levenshtein.cs +++ b/FuzzySharp/Levenshtein.cs @@ -21,8 +21,6 @@ private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, Re { int i; - Span matrix; - int p1 = 0; int p2 = 0; @@ -51,7 +49,7 @@ private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, Re len1++; len2++; - matrix = new int[len2 * len1]; + Span matrix = new int[len2 * len1]; for (i = 0; i < len2; i++) matrix[i] = i; @@ -96,7 +94,6 @@ private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, Re } - return EditOpsFromCostMatrix(len1, c1, p1, len1o, len2, c2, p2, len2o, matrix); } @@ -248,7 +245,7 @@ public static MatchingBlock[] GetMatchingBlocks(int len1, int len2, OpCode[] ops noOfMB = 0; - for (i = n; i-- != 0; o++) + for (i = n; i != 0; i--, o++) { if (ops[o].EditType == EditType.KEEP) { @@ -298,7 +295,7 @@ public static MatchingBlock[] GetMatchingBlocks(int len1, int len2, OpCode[] ops Debug.Assert(mb != noOfMB); - MatchingBlock finalBlock = new MatchingBlock + var finalBlock = new MatchingBlock { SourcePos = len1, DestPos = len2, @@ -326,7 +323,9 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op EditType type; - for (i = n; i != 0;) + i = n; + + while (i > 0) { @@ -381,9 +380,6 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op } while (i != 0 && ops[o].EditType == type && SourcePos == ops[o].SourcePos && DestPos == ops[o].DestPos); break; - - default: - break; } } @@ -398,8 +394,9 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op SourcePos = DestPos = 0; int mbIndex = 0; + i = n; - for (i = n; i != 0;) + while (i > 0) { while (ops[o].EditType == EditType.KEEP && --i != 0) @@ -410,11 +407,13 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op if (SourcePos < ops[o].SourcePos || DestPos < ops[o].DestPos) { - MatchingBlock mb = new MatchingBlock(); + var mb = new MatchingBlock + { + SourcePos = SourcePos, + DestPos = DestPos, + Length = ops[o].SourcePos - SourcePos + }; - mb.SourcePos = SourcePos; - mb.DestPos = DestPos; - mb.Length = ops[o].SourcePos - SourcePos; SourcePos = ops[o].SourcePos; DestPos = ops[o].DestPos; @@ -456,9 +455,6 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op } while (i != 0 && ops[o].EditType == type && SourcePos == ops[o].SourcePos && DestPos == ops[o].DestPos); break; - - default: - break; } } @@ -466,20 +462,24 @@ private static MatchingBlock[] GetMatchingBlocks(int len1, int len2, EditOp[] op { Debug.Assert(len1 -SourcePos == len2 - DestPos); - MatchingBlock mb = new MatchingBlock(); - mb.SourcePos = SourcePos; - mb.DestPos = DestPos; - mb.Length = len1 - SourcePos; + var mb = new MatchingBlock + { + SourcePos = SourcePos, + DestPos = DestPos, + Length = len1 - SourcePos + }; matchingBlocks[mbIndex++] = mb; } Debug.Assert(numberOfMatchingBlocks == mbIndex); - MatchingBlock finalBlock = new MatchingBlock(); - finalBlock.SourcePos = len1; - finalBlock.DestPos = len2; - finalBlock.Length = 0; + var finalBlock = new MatchingBlock + { + SourcePos = len1, + DestPos = len2, + Length = 0 + }; matchingBlocks[mbIndex] = finalBlock; @@ -497,7 +497,9 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) noOfBlocks = 0; SourcePos = DestPos = 0; - for (i = n; i != 0;) + i = n; + + while (i > 0) { while (ops[o].EditType == EditType.KEEP && --i != 0) @@ -553,9 +555,6 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) } while (i != 0 && ops[o].EditType == type && SourcePos == ops[o].SourcePos && DestPos == ops[o].DestPos); break; - - default: - break; } } @@ -568,7 +567,9 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) SourcePos = DestPos = 0; int oIndex = 0; - for (i = n; i != 0;) + i = n; + + while (i > 0) { while (ops[o].EditType == EditType.KEEP && --i != 0) @@ -646,16 +647,15 @@ private static OpCode[] EditOpsToOpCodes(EditOp[] ops, int len1, int len2) { Debug.Assert(len1 - SourcePos == len2 - DestPos); - if (opCodes[oIndex] == null) - opCodes[oIndex] = new OpCode(); - opCodes[oIndex].EditType = EditType.KEEP; - opCodes[oIndex].SourceBegin = SourcePos; - opCodes[oIndex].DestBegin = DestPos; - opCodes[oIndex].SourceEnd = len1; - opCodes[oIndex].DestEnd = len2; - oIndex++; + var opcode = opCodes[oIndex] ?? (opCodes[oIndex] = new OpCode()); + opcode.EditType = EditType.KEEP; + opcode.SourceBegin = SourcePos; + opcode.DestBegin = DestPos; + opcode.SourceEnd = len1; + opcode.DestEnd = len2; + oIndex++; } Debug.Assert(oIndex == noOfBlocks); @@ -765,7 +765,7 @@ public static int EditDistance(ReadOnlySpan c1, ReadOnlySpan c2, int xc if (ch1.Equals(c2[c2p++])) { - x = --D; + x = D-1; } else { @@ -831,7 +831,7 @@ public static int EditDistance(ReadOnlySpan c1, ReadOnlySpan c2, int xc /* main */ while (p <= end) { - int c3 = --D + (!ch1.Equals(c2[c2p++]) ? 1 : 0); + int c3 = D-1 + (!ch1.Equals(c2[c2p++]) ? 1 : 0); x++; if (x > c3) { @@ -848,7 +848,7 @@ public static int EditDistance(ReadOnlySpan c1, ReadOnlySpan c2, int xc /* lower triangle sentinel */ if (i <= half) { - int c3 = --D + (!ch1.Equals(c2[c2p]) ? 1 : 0); + int c3 = D - 1 + (!ch1.Equals(c2[c2p]) ? 1 : 0); x++; if (x > c3) { diff --git a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs index 0cc5647..eb67157 100644 --- a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs +++ b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs @@ -1,18 +1,25 @@ using System; -using System.Text.RegularExpressions; namespace FuzzySharp.PreProcess { - internal class StringPreprocessorFactory + internal static class StringPreprocessorFactory { - private static string pattern = "[^ a-zA-Z0-9]"; - private static string Default(string input) { - input = Regex.Replace(input, pattern, " "); - input = input.ToLower(); + if (string.IsNullOrWhiteSpace(input)) + { + return string.Empty; + } + + var result = new char[input.Length].AsSpan(); + + for (var i = 0; i < input.Length; i++) + { + var c = input[i]; + result[i] = char.IsLetterOrDigit(c) ? char.ToLower(c) : ' '; + } - return input.Trim(); + return result.ToString().Trim(); } public static Func GetPreprocessor(PreprocessMode mode) diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs index 98c95ce..8b4739a 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs @@ -1,6 +1,6 @@ using System.Collections.Generic; using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; using FuzzySharp.Utils; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive @@ -23,25 +23,25 @@ public override int Score(string input1, string input2) longer = input1; } - double lenRatio = ((double)longer.Length) / shorter.Length; + double lenRatio = (double)longer.Length / shorter.Length; // if longer isn't at least 1.5 times longer than the other, then its probably not an abbreviation if (lenRatio < 1.5) return 0; // numbers can't be abbreviations for other numbers, though that would be hilarious. "Yes, 4 - as in 4,238" - var tokensLonger = Regex.Matches(longer, @"[a-zA-Z]+").Cast().Select(m => m.Value).ToArray(); - var tokensShorter = Regex.Matches(shorter, @"[a-zA-Z]+").Cast().Select(m => m.Value).ToArray(); + var tokensLonger = longer.ExtractLetterOnlyWords(); + var tokensShorter = shorter.ExtractLetterOnlyWords(); // more than 4 tokens and it's probably not an abbreviation (and could get costly) - if (tokensShorter.Length > 4) + if (tokensShorter.Count > 4) { return 0; } - string[] moreTokens; - string[] fewerTokens; + List moreTokens; + List fewerTokens; - if (tokensLonger.Length > tokensShorter.Length) + if (tokensLonger.Count > tokensShorter.Count) { moreTokens = tokensLonger; fewerTokens = tokensShorter; @@ -52,13 +52,13 @@ public override int Score(string input1, string input2) fewerTokens = tokensLonger; } - var allPermutations = moreTokens.PermutationsOfSize(fewerTokens.Length); + var allPermutations = moreTokens.PermutationsOfSize(fewerTokens.Count); List allScores = new List(); foreach (var permutation in allPermutations) { double sum = 0; - for (int i = 0; i < fewerTokens.Length; i++) + for (int i = 0; i < fewerTokens.Count; i++) { var i1 = permutation[i]; var i2 = fewerTokens[i]; @@ -68,7 +68,7 @@ public override int Score(string input1, string input2) sum += score; } } - allScores.Add((int) (sum / fewerTokens.Length)); + allScores.Add((int) (sum / fewerTokens.Count)); } return allScores.Count==0?0:allScores.Max(); diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs index 11036af..0e02491 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs @@ -1,5 +1,4 @@ -using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; using FuzzySharp.PreProcess; using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive.Generic; @@ -14,8 +13,8 @@ public override int Score(string[] input1, string[] input2) public int Score(string input1, string input2) { - var tokens1 = Regex.Split(input1, @"\s+").Where(s => s.Any()).OrderBy(s => s).ToArray(); - var tokens2 = Regex.Split(input2, @"\s+").Where(s => s.Any()).OrderBy(s => s).ToArray(); + var tokens1 = input1.GetSortedWords(); + var tokens2 = input2.GetSortedWords(); return Score(tokens1, tokens2); } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs index 10aa1af..bbf6bb9 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs @@ -1,5 +1,5 @@ using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { @@ -26,7 +26,7 @@ public override int Score(string input1, string input2) // if longer isn't at least 3 times longer than the other, then it's probably not an initialism if (lenRatio < 3) return 0; - var initials = Regex.Split(longer, @"\s+").Where(s => s.Any()).Select(s => s[0]); + var initials = longer.SplitByAnySpace().Select(s => s[0]); return Scorer(string.Join("", initials), shorter); } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 785de55..63c29ae 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -1,7 +1,6 @@ -using System; -using System.Collections.Generic; +using System.Collections.Generic; using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { @@ -9,12 +8,12 @@ public abstract class TokenSetScorerBase : StrategySensitiveScorerBase { public override int Score(string input1, string input2) { - var tokens1 = new HashSet(Regex.Split(input1, @"\s+").Where(s => s.Any())); - var tokens2 = new HashSet(Regex.Split(input2, @"\s+").Where(s => s.Any())); + var tokens1 = new HashSet(input1.SplitByAnySpace()); + var tokens2 = new HashSet(input2.SplitByAnySpace()); - var sortedIntersection = String.Join(" ", tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); - var sortedDiff1To2 = (sortedIntersection + " " + String.Join(" ", tokens1.Except(tokens2).OrderBy(s => s))).Trim(); - var sortedDiff2To1 = (sortedIntersection + " " + String.Join(" ", tokens2.Except(tokens1).OrderBy(s => s))).Trim(); + var sortedIntersection = string.Join(" ", tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); + var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.Except(tokens2).OrderBy(s => s))).Trim(); + var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.Except(tokens1).OrderBy(s => s))).Trim(); return new[] { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs index dbfa10a..032b779 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs @@ -1,6 +1,4 @@ -using System; -using System.Linq; -using System.Text.RegularExpressions; +using FuzzySharp.Extensions; namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { @@ -8,8 +6,8 @@ public abstract class TokenSortScorerBase : StrategySensitiveScorerBase { public override int Score(string input1, string input2) { - var sorted1 = String.Join(" ", Regex.Split(input1, @"\s+").Where(s => s.Any()).OrderBy(s => s)).Trim(); - var sorted2 = String.Join(" ", Regex.Split(input2, @"\s+").Where(s => s.Any()).OrderBy(s => s)).Trim(); + var sorted1 = input1.NormalizeSpacesAndSort(); + var sorted2 = input2.NormalizeSpacesAndSort(); return Scorer(sorted1, sorted2); } diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index 8983b4d..937518c 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using System.Linq; -using FuzzySharp.Edits; namespace FuzzySharp.SimilarityRatio.Strategy.Generic { @@ -28,22 +27,22 @@ public static int Calculate(T[] input1, T[] input2) longer = input1; } - MatchingBlock[] matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); + var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); - List scores = new List(); + var scores = new List(); foreach (var matchingBlock in matchingBlocks) { - int dist = matchingBlock.DestPos - matchingBlock.SourcePos; + var dist = matchingBlock.DestPos - matchingBlock.SourcePos; - int longStart = dist > 0 ? dist : 0; - int longEnd = longStart + shorter.Length; + var longStart = dist > 0 ? dist : 0; + var longEnd = longStart + shorter.Length; if (longEnd > longer.Length) longEnd = longer.Length; - var longSubstr = longer.AsSpan().Slice(longStart, longEnd - longStart); + var longSubstr = longer.AsSpan()[longStart..longEnd]; - double ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); + var ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); if (ratio > .995) { diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index 1d25991..442ac3f 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -1,7 +1,6 @@ using System; using System.Collections.Generic; using System.Linq; -using FuzzySharp.Edits; namespace FuzzySharp.SimilarityRatio.Strategy { @@ -28,22 +27,22 @@ public static int Calculate(string input1, string input2) longer = input1; } - MatchingBlock[] matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); + var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); - List scores = new List(); + var scores = new List(); foreach (var matchingBlock in matchingBlocks) { - int dist = matchingBlock.DestPos - matchingBlock.SourcePos; + var dist = matchingBlock.DestPos - matchingBlock.SourcePos; - int longStart = dist > 0 ? dist : 0; - int longEnd = longStart + shorter.Length; + var longStart = dist > 0 ? dist : 0; + var longEnd = longStart + shorter.Length; if (longEnd > longer.Length) longEnd = longer.Length; - string longSubstr = longer.Substring(longStart, longEnd - longStart); + var longSubstr = longer.AsSpan()[longStart..longEnd]; - double ratio = Levenshtein.GetRatio(shorter, longSubstr); + var ratio = Levenshtein.GetRatio(shorter, longSubstr); if (ratio > .995) { diff --git a/FuzzySharp/Utils/Heap.cs b/FuzzySharp/Utils/Heap.cs index b890982..7b1d1eb 100644 --- a/FuzzySharp/Utils/Heap.cs +++ b/FuzzySharp/Utils/Heap.cs @@ -11,13 +11,11 @@ public abstract class Heap : IEnumerable private const int GrowFactor = 2; private const int MinGrow = 1; - private int _capacity = InitialCapacity; private T[] _heap = new T[InitialCapacity]; - private int _tail = 0; - public int Count => _tail; + public int Count { get; private set; } - public int Capacity => _capacity; + public int Capacity { get; private set; } = InitialCapacity; protected Comparer Comparer { get; } protected abstract bool Dominates(T x, T y); @@ -46,10 +44,10 @@ protected Heap(IEnumerable collection, Comparer comparer) if (Count == Capacity) Grow(); - _heap[_tail++] = item; + _heap[Count++] = item; } - for (int i = Parent(_tail - 1); i >= 0; i--) + for (var i = Parent(Count - 1); i >= 0; i--) BubbleDown(i); } @@ -58,8 +56,8 @@ public void Add(T item) if (Count == Capacity) Grow(); - _heap[_tail++] = item; - BubbleUp(_tail - 1); + _heap[Count++] = item; + BubbleUp(Count - 1); } private void BubbleUp(int i) @@ -82,9 +80,9 @@ public T GetMin() public T ExtractDominating() { if (Count == 0) throw new InvalidOperationException("Heap is empty"); - T ret = _heap[0]; - _tail--; - Swap(_tail, 0); + var ret = _heap[0]; + Count--; + Swap(Count, 0); BubbleDown(0); return ret; } @@ -93,7 +91,7 @@ private void BubbleDown(int i) { while (true) { - int dominatingNode = Dominating(i); + var dominatingNode = Dominating(i); if (dominatingNode == i) return; Swap(i, dominatingNode); i = dominatingNode; @@ -102,7 +100,7 @@ private void BubbleDown(int i) private int Dominating(int i) { - int dominatingNode = i; + var dominatingNode = i; dominatingNode = GetDominating(YoungChild(i), dominatingNode); dominatingNode = GetDominating(OldChild(i), dominatingNode); @@ -111,17 +109,15 @@ private int Dominating(int i) private int GetDominating(int newNode, int dominatingNode) { - if (newNode < _tail && !Dominates(_heap[dominatingNode], _heap[newNode])) + if (newNode < Count && !Dominates(_heap[dominatingNode], _heap[newNode])) return newNode; - else - return dominatingNode; + + return dominatingNode; } private void Swap(int i, int j) { - T tmp = _heap[i]; - _heap[i] = _heap[j]; - _heap[j] = tmp; + (_heap[i], _heap[j]) = (_heap[j], _heap[i]); } private static int Parent(int i) @@ -141,11 +137,11 @@ private static int OldChild(int i) private void Grow() { - int newCapacity = _capacity * GrowFactor + MinGrow; + var newCapacity = Capacity * GrowFactor + MinGrow; var newHeap = new T[newCapacity]; - Array.Copy(_heap, newHeap, _capacity); + Array.Copy(_heap, newHeap, Capacity); _heap = newHeap; - _capacity = newCapacity; + Capacity = newCapacity; } public IEnumerator GetEnumerator() diff --git a/FuzzySharp/Utils/Permutation.cs b/FuzzySharp/Utils/Permutation.cs index e6c0976..d09e8e6 100644 --- a/FuzzySharp/Utils/Permutation.cs +++ b/FuzzySharp/Utils/Permutation.cs @@ -122,9 +122,7 @@ private static IEnumerable> Permute(List set, int start, int end) private static void Swap(List set, int a, int b) { - var temp = set[a]; - set[a] = set[b]; - set[b] = temp; + (set[a], set[b]) = (set[b], set[a]); } public static IEnumerable> Cycles(IEnumerable seed) From 33daf545b30af5bdf1953ce0c78d5b8a14ea6e12 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Wed, 31 Jul 2024 17:28:01 +0200 Subject: [PATCH 03/30] code cleanup --- FuzzySharp/Extensions/StringExtensions.cs | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/FuzzySharp/Extensions/StringExtensions.cs b/FuzzySharp/Extensions/StringExtensions.cs index 1ce26ad..4cf4463 100644 --- a/FuzzySharp/Extensions/StringExtensions.cs +++ b/FuzzySharp/Extensions/StringExtensions.cs @@ -14,22 +14,21 @@ public static List ExtractLetterOnlyWords(this string input) var span = input.AsSpan(); - int start = 0; + var start = 0; for (var i = 0; i < span.Length; i++) { - if (!char.IsLetter(span[i])) - { - if (i - start > 0) - { - result.Add(span.Slice(start, i - start).ToString()); - } + if (char.IsLetter(span[i])) continue; - start = i+1; + if (i - start > 0) + { + result.Add(span[start..i].ToString()); } + + start = i+1; } if (span.Length - start > 0) - result.Add(span.Slice(start, span.Length - start).ToString()); + result.Add(span[start..].ToString()); return result; } From f5073bd6335d50f00406127a53b88d0eec3aa477 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Fri, 2 Aug 2024 13:20:35 +0200 Subject: [PATCH 04/30] further optimizations, code cleanup --- FuzzySharp.Test/FuzzySharp.Test.csproj | 2 +- FuzzySharp/Edits/MatchingBlock.cs | 7 ++----- FuzzySharp/Extensions/StringExtensions.cs | 2 +- .../TokenAbbreviation/TokenAbbreviationScorerBase.cs | 9 +++++---- .../TokenInitialism/TokenInitialismScorerBase.cs | 4 ++-- .../StrategySensitive/TokenSet/TokenSetScorerBase.cs | 6 +++--- 6 files changed, 14 insertions(+), 16 deletions(-) diff --git a/FuzzySharp.Test/FuzzySharp.Test.csproj b/FuzzySharp.Test/FuzzySharp.Test.csproj index 876916e..5f90e92 100644 --- a/FuzzySharp.Test/FuzzySharp.Test.csproj +++ b/FuzzySharp.Test/FuzzySharp.Test.csproj @@ -13,7 +13,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + diff --git a/FuzzySharp/Edits/MatchingBlock.cs b/FuzzySharp/Edits/MatchingBlock.cs index 585b6ad..9f0e399 100644 --- a/FuzzySharp/Edits/MatchingBlock.cs +++ b/FuzzySharp/Edits/MatchingBlock.cs @@ -1,14 +1,11 @@ namespace FuzzySharp.Edits { - public class MatchingBlock + public sealed class MatchingBlock { public int SourcePos { get; set; } public int DestPos { get; set; } public int Length { get; set; } - public override string ToString() - { - return $"({SourcePos},{DestPos},{Length})"; - } + public override string ToString() => $"({SourcePos},{DestPos},{Length})"; } } diff --git a/FuzzySharp/Extensions/StringExtensions.cs b/FuzzySharp/Extensions/StringExtensions.cs index 4cf4463..b3ef27d 100644 --- a/FuzzySharp/Extensions/StringExtensions.cs +++ b/FuzzySharp/Extensions/StringExtensions.cs @@ -5,7 +5,7 @@ namespace FuzzySharp.Extensions { internal static class StringExtensions { - public static List ExtractLetterOnlyWords(this string input) + public static List ExtractTokens(this string input) { var result = new List(); diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs index 8b4739a..848bb13 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; using FuzzySharp.Extensions; using FuzzySharp.Utils; @@ -29,8 +30,8 @@ public override int Score(string input1, string input2) if (lenRatio < 1.5) return 0; // numbers can't be abbreviations for other numbers, though that would be hilarious. "Yes, 4 - as in 4,238" - var tokensLonger = longer.ExtractLetterOnlyWords(); - var tokensShorter = shorter.ExtractLetterOnlyWords(); + var tokensLonger = longer.ExtractTokens(); + var tokensShorter = shorter.ExtractTokens(); // more than 4 tokens and it's probably not an abbreviation (and could get costly) if (tokensShorter.Count > 4) @@ -80,7 +81,7 @@ public override int Score(string input1, string input2) /// /// /// - private bool StringContainsInOrder(string s1, string s2) + private static bool StringContainsInOrder(ReadOnlySpan s1, ReadOnlySpan s2) { if (s1.Length < s2.Length) return false; int s2_idx = 0; diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs index bbf6bb9..4dd3a6b 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs @@ -26,9 +26,9 @@ public override int Score(string input1, string input2) // if longer isn't at least 3 times longer than the other, then it's probably not an initialism if (lenRatio < 3) return 0; - var initials = longer.SplitByAnySpace().Select(s => s[0]); + var initials = longer.SplitByAnySpace().Select(s => s[0]).ToArray(); - return Scorer(string.Join("", initials), shorter); + return Scorer(new string(initials), shorter); } } } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 63c29ae..091d3bd 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -11,9 +11,9 @@ public override int Score(string input1, string input2) var tokens1 = new HashSet(input1.SplitByAnySpace()); var tokens2 = new HashSet(input2.SplitByAnySpace()); - var sortedIntersection = string.Join(" ", tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); - var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.Except(tokens2).OrderBy(s => s))).Trim(); - var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.Except(tokens1).OrderBy(s => s))).Trim(); + var sortedIntersection = string.Join(' ', tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); + var sortedDiff1To2 = (sortedIntersection + " " + string.Join(' ', tokens1.Except(tokens2).OrderBy(s => s))).Trim(); + var sortedDiff2To1 = (sortedIntersection + " " + string.Join(' ', tokens2.Except(tokens1).OrderBy(s => s))).Trim(); return new[] { From 48b4b45a15c16b6fbf43070ac2404c5df1b9586f Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Tue, 6 Aug 2024 10:24:38 +0200 Subject: [PATCH 05/30] Refactor and optimize codebase; update dependencies Updated .gitignore to include .vshistory/. Upgraded NUnit3TestAdapter to 4.6.0. Refactored TestScoringEmptyString method in RegressionTests.cs. Simplified string formatting in ExtractedResult.cs. Updated target framework to NET8.0 in FuzzySharp.csproj. Simplified conditional logic in Levenshtein.cs. Refactored StringPreprocessorFactory.cs for better string trimming and switch expression. Used null-coalescing assignment in Process.cs methods. Made several scorer classes sealed. Removed unnecessary using directive in TokenAbbreviationScorerBase.cs. Optimized scoring logic in TokenAbbreviationScorerBase.cs. Changed several strategy classes to static. Optimized score calculation in PartialRatioStrategy.cs. Simplified Heap constructor and added null checks. Removed redundant ToList calls and optimized permutation logic in Permutation.cs. Simplified Cycles method in Permutation.cs. --- .gitignore | 1 + FuzzySharp.Test/FuzzySharp.Test.csproj | 2 +- FuzzySharp.Test/FuzzyTests/RegressionTests.cs | 21 ++++------ FuzzySharp/Extractor/ExtractedResult.cs | 2 +- FuzzySharp/FuzzySharp.csproj | 2 +- FuzzySharp/Levenshtein.cs | 6 +-- .../PreProcess/StringPreprocessorFactory.cs | 15 +++---- FuzzySharp/Process.cs | 24 +++++------ .../Scorer/Composite/WeightedRatioScorer.cs | 2 +- .../Simple/DefaultRatioScorer.cs | 2 +- .../Simple/PartialRatioScorer.cs | 2 +- .../TokenAbbreviationScorerBase.cs | 12 ++++-- .../PartialTokenDifferenceScorer.cs | 2 +- .../TokenDifference/TokenDifferenceScorer.cs | 2 +- .../Strategy/DefaultRatioStrategy.cs | 2 +- .../Strategy/Generic/DefaultRatioStrategyT.cs | 2 +- .../Strategy/Generic/PartialRatioStrategyT.cs | 2 +- .../Strategy/PartialRatioStrategy.cs | 14 +++---- FuzzySharp/Utils/Heap.cs | 7 ++-- FuzzySharp/Utils/Permutation.cs | 41 +++++++++---------- 20 files changed, 78 insertions(+), 85 deletions(-) diff --git a/.gitignore b/.gitignore index 940794e..861efa5 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ bld/ # Visual Studio 2015 cache/options directory .vs/ +.vshistory/ # Uncomment if you have tasks that create the project's static files in wwwroot #wwwroot/ diff --git a/FuzzySharp.Test/FuzzySharp.Test.csproj b/FuzzySharp.Test/FuzzySharp.Test.csproj index 5f90e92..48959cd 100644 --- a/FuzzySharp.Test/FuzzySharp.Test.csproj +++ b/FuzzySharp.Test/FuzzySharp.Test.csproj @@ -9,7 +9,7 @@ - + all runtime; build; native; contentfiles; analyzers; buildtransitive diff --git a/FuzzySharp.Test/FuzzyTests/RegressionTests.cs b/FuzzySharp.Test/FuzzyTests/RegressionTests.cs index 134d2ee..9fea3f8 100644 --- a/FuzzySharp.Test/FuzzyTests/RegressionTests.cs +++ b/FuzzySharp.Test/FuzzyTests/RegressionTests.cs @@ -12,43 +12,37 @@ namespace FuzzySharp.Test.FuzzyTests public class RegressionTests { - /// /// Test to ensure that all IRatioScorer implementations handle scoring empty strings & whitespace strings /// [Test] public void TestScoringEmptyString() { - var scorerType = typeof(IRatioScorer); var assemblies = AppDomain.CurrentDomain.GetAssemblies().ToList(); var types = assemblies.SelectMany(s => { - Type[] types = new Type[] { }; ; try { - types = s.GetTypes(); + return s.GetTypes(); } catch {} - return types; + return []; }).ToList(); var scorerTypes = types.Where(t => scorerType.IsAssignableFrom(t) && !t.IsAbstract && t.IsClass).ToList(); - //var scorerTypes = AppDomain.CurrentDomain.GetAssemblies().SelectMany(s => s.GetTypes()).Where(p => scorerType.IsAssignableFrom(p) && p.IsClass && !p.IsAbstract); - - - MethodInfo getScorerCacheMethodInfo = typeof(ScorerCache).GetMethod("Get"); - + string nullString = null; //Null doesnt seem to be handled by any scorer string emptyString = ""; string whitespaceString = " "; - string[] nullOrWhitespaceStrings = { emptyString, whitespaceString }; + string[] nullOrWhitespaceStrings = [emptyString, whitespaceString]; + MethodInfo getScorerCacheMethodInfo = typeof(ScorerCache).GetMethod("Get"); - foreach (Type t in scorerTypes) + foreach (var t in scorerTypes) { System.Diagnostics.Debug.WriteLine($"Testing {t.Name}"); MethodInfo m = getScorerCacheMethodInfo.MakeGenericMethod(t); - IRatioScorer scorer = m.Invoke(this, new object[] { }) as IRatioScorer; + IRatioScorer scorer = m.Invoke(this, []) as IRatioScorer; foreach(string s in nullOrWhitespaceStrings) { @@ -79,7 +73,6 @@ public void TestScoringEmptyString() } - } } diff --git a/FuzzySharp/Extractor/ExtractedResult.cs b/FuzzySharp/Extractor/ExtractedResult.cs index 43f41e2..30aad9c 100644 --- a/FuzzySharp/Extractor/ExtractedResult.cs +++ b/FuzzySharp/Extractor/ExtractedResult.cs @@ -34,7 +34,7 @@ public override string ToString() { return $"(string: {Value}, score: {Score}, index: {Index})"; } - return $"(value: {Value.ToString()}, score: {Score}, index: {Index})"; + return $"(value: {Value}, score: {Score}, index: {Index})"; } } } diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index 74ee9b4..975af4b 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,7 +1,7 @@  - netstandard2.1 + NET8.0 true Jacob Bayer Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek diff --git a/FuzzySharp/Levenshtein.cs b/FuzzySharp/Levenshtein.cs index 63d89a9..a815be1 100644 --- a/FuzzySharp/Levenshtein.cs +++ b/FuzzySharp/Levenshtein.cs @@ -725,10 +725,8 @@ public static int EditDistance(ReadOnlySpan c1, ReadOnlySpan c2, int xc { return len2 + 1 - 2 * Memchr(c2, str2, c1[str1], len2); } - else - { - return len2 - Memchr(c2, str2, c1[str1], len2); - } + + return len2 - Memchr(c2, str2, c1[str1], len2); } len1++; diff --git a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs index eb67157..a454e5d 100644 --- a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs +++ b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs @@ -19,20 +19,17 @@ private static string Default(string input) result[i] = char.IsLetterOrDigit(c) ? char.ToLower(c) : ' '; } - return result.ToString().Trim(); + return result.Trim().ToString(); } public static Func GetPreprocessor(PreprocessMode mode) { - switch (mode) + return mode switch { - case PreprocessMode.Full: - return Default; - case PreprocessMode.None: - return s => s; - default: - throw new InvalidOperationException($"Invalid string preprocessor mode: {mode}"); - } + PreprocessMode.Full => Default, + PreprocessMode.None => s => s, + _ => throw new InvalidOperationException($"Invalid string preprocessor mode: {mode}") + }; } } } diff --git a/FuzzySharp/Process.cs b/FuzzySharp/Process.cs index dbc5caf..87717fe 100644 --- a/FuzzySharp/Process.cs +++ b/FuzzySharp/Process.cs @@ -31,8 +31,8 @@ public static IEnumerable> ExtractAll( IRatioScorer scorer = null, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractWithoutOrder(query, choices, processor, scorer, cutoff); } @@ -53,7 +53,7 @@ public static IEnumerable> ExtractAll( IRatioScorer scorer = null, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractWithoutOrder(query, choices, processor, scorer, cutoff); } #endregion @@ -78,8 +78,8 @@ public static IEnumerable> ExtractTop( int limit = 5, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractTop(query, choices, processor, scorer, limit, cutoff); } @@ -103,7 +103,7 @@ public static IEnumerable> ExtractTop( int limit = 5, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractTop(query, choices, processor, scorer, limit, cutoff); } #endregion @@ -125,8 +125,8 @@ public static IEnumerable> ExtractSorted( IRatioScorer scorer = null, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractSorted(query, choices, processor, scorer, cutoff); } @@ -146,7 +146,7 @@ public static IEnumerable> ExtractSorted( IRatioScorer scorer = null, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractSorted(query, choices, processor, scorer, cutoff); } #endregion @@ -168,8 +168,8 @@ public static ExtractedResult ExtractOne( IRatioScorer scorer = null, int cutoff = 0) { - if (processor == null) processor = s_defaultStringProcessor; - if (scorer == null) scorer = s_defaultScorer; + processor ??= s_defaultStringProcessor; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractOne(query, choices, processor, scorer, cutoff); } @@ -189,7 +189,7 @@ public static ExtractedResult ExtractOne( IRatioScorer scorer = null, int cutoff = 0) { - if (scorer == null) scorer = s_defaultScorer; + scorer ??= s_defaultScorer; return ResultExtractor.ExtractOne(query, choices, processor, scorer, cutoff); } diff --git a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs index b3744ec..490decb 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.Composite { - public class WeightedRatioScorer : ScorerBase + public sealed class WeightedRatioScorer : ScorerBase { private static double UNBASE_SCALE = .95; private static double PARTIAL_SCALE = .90; diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs index 12ef6d1..e33dee9 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public class DefaultRatioScorer : SimpleRatioScorerBase + public sealed class DefaultRatioScorer : SimpleRatioScorerBase { protected override Func Scorer => DefaultRatioStrategy.Calculate; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs index 049d8af..0065965 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public class PartialRatioScorer : SimpleRatioScorerBase + public sealed class PartialRatioScorer : SimpleRatioScorerBase { protected override Func Scorer => PartialRatioStrategy.Calculate; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs index 848bb13..501e4d4 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs @@ -1,6 +1,5 @@ using System; using System.Collections.Generic; -using System.Linq; using FuzzySharp.Extensions; using FuzzySharp.Utils; @@ -55,7 +54,8 @@ public override int Score(string input1, string input2) var allPermutations = moreTokens.PermutationsOfSize(fewerTokens.Count); - List allScores = new List(); + int maxScore = 0; + foreach (var permutation in allPermutations) { double sum = 0; @@ -69,10 +69,14 @@ public override int Score(string input1, string input2) sum += score; } } - allScores.Add((int) (sum / fewerTokens.Count)); + var avgScore = (int) (sum / fewerTokens.Count); + if(avgScore > maxScore) + { + maxScore = avgScore; + } } - return allScores.Count==0?0:allScores.Max(); + return maxScore; } /// diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs index a216197..22cbed8 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public class PartialTokenDifferenceScorer : TokenDifferenceScorerBase + public sealed class PartialTokenDifferenceScorer : TokenDifferenceScorerBase { protected override Func Scorer => PartialRatioStrategy.Calculate; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs index fc2bfb9..a9f59fd 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public class TokenDifferenceScorer : TokenDifferenceScorerBase + public sealed class TokenDifferenceScorer : TokenDifferenceScorerBase { protected override Func Scorer => DefaultRatioStrategy.Calculate; } diff --git a/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs index 8e8fac2..72d6cd3 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs @@ -2,7 +2,7 @@ namespace FuzzySharp.SimilarityRatio.Strategy { - internal class DefaultRatioStrategy + internal static class DefaultRatioStrategy { public static int Calculate(string input1, string input2) { diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs index 2fdfb08..f6efd79 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs @@ -2,7 +2,7 @@ namespace FuzzySharp.SimilarityRatio.Strategy.Generic { - internal class DefaultRatioStrategy where T : IEquatable + internal static class DefaultRatioStrategy where T : IEquatable { public static int Calculate(T[] input1, T[] input2) { diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index 937518c..24f0dd6 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -4,7 +4,7 @@ namespace FuzzySharp.SimilarityRatio.Strategy.Generic { - internal class PartialRatioStrategy where T : IEquatable + internal static class PartialRatioStrategy where T : IEquatable { public static int Calculate(T[] input1, T[] input2) { diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index 442ac3f..4c7152f 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -1,10 +1,8 @@ using System; -using System.Collections.Generic; -using System.Linq; namespace FuzzySharp.SimilarityRatio.Strategy { - internal class PartialRatioStrategy + internal static class PartialRatioStrategy { public static int Calculate(string input1, string input2) { @@ -29,7 +27,7 @@ public static int Calculate(string input1, string input2) var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); - var scores = new List(); + double maxScore = 0; foreach (var matchingBlock in matchingBlocks) { @@ -49,11 +47,13 @@ public static int Calculate(string input1, string input2) return 100; } - scores.Add(ratio); - + if (ratio > maxScore) + { + maxScore = ratio; + } } - return (int)Math.Round(100 * scores.Max()); + return (int)Math.Round(100 * maxScore); } } } diff --git a/FuzzySharp/Utils/Heap.cs b/FuzzySharp/Utils/Heap.cs index 7b1d1eb..88c4f06 100644 --- a/FuzzySharp/Utils/Heap.cs +++ b/FuzzySharp/Utils/Heap.cs @@ -24,7 +24,7 @@ protected Heap() : this(Comparer.Default) { } - protected Heap(Comparer comparer) : this(Enumerable.Empty(), comparer) + protected Heap(Comparer comparer) : this([], comparer) { } @@ -35,9 +35,10 @@ protected Heap(IEnumerable collection) protected Heap(IEnumerable collection, Comparer comparer) { - if (collection == null) throw new ArgumentNullException(nameof(collection)); + ArgumentNullException.ThrowIfNull(collection); + ArgumentNullException.ThrowIfNull(comparer); - Comparer = comparer ?? throw new ArgumentNullException(nameof(comparer)); + Comparer = comparer; foreach (var item in collection) { diff --git a/FuzzySharp/Utils/Permutation.cs b/FuzzySharp/Utils/Permutation.cs index d09e8e6..d28a748 100644 --- a/FuzzySharp/Utils/Permutation.cs +++ b/FuzzySharp/Utils/Permutation.cs @@ -15,7 +15,7 @@ public Permutor(IEnumerable set) public List PermutationAt(long i) { - var set = new List(_set.OrderBy(e => e).ToList()); + var set = new List(_set.OrderBy(e => e)); for (long j = 0; j < i - 1; j++) { NextPermutation(set); @@ -62,22 +62,22 @@ public bool NextPermutation(List set) public static class Permutation { - public static List> AllPermutations(this IEnumerable seed) + private static IEnumerable> AllPermutations(this IEnumerable seed) { var set = new List(seed); - return Permute(set, 0, set.Count - 1).ToList(); + return Permute(set, 0, set.Count - 1); } - public static List> PermutationsOfSize(this IEnumerable seed, int size) + public static IEnumerable> PermutationsOfSize(this List seed, int size) { - if (seed.Count() < size) - { - return new List>(); - } - return seed.PermutationsOfSize(new List(), size).ToList(); + var result = seed.Count < size + ? [] + : seed.PermutationsOfSize([], size); + + return result; } - private static IEnumerable> PermutationsOfSize(this IEnumerable seed, List set, int size) + private static IEnumerable> PermutationsOfSize(this List seed, List set, int size) { if (size == 0) { @@ -85,17 +85,16 @@ private static IEnumerable> PermutationsOfSize(this IEnumerable se { yield return permutation; } + + yield break; } - else + + for (int i = 0; i < seed.Count; i++) { - var seedAsList = seed.ToList(); - for (int i = 0; i < seedAsList.Count; i++) + var newSet = new List(set) { seed[i] }; + foreach (var permutation in seed.Skip(i + 1).ToList().PermutationsOfSize(newSet, size - 1)) { - var newSet = new List(set) { seedAsList[i] }; - foreach (var permutation in seedAsList.Skip(i + 1).PermutationsOfSize(newSet, size - 1)) - { - yield return permutation; - } + yield return permutation; } } } @@ -104,7 +103,7 @@ private static IEnumerable> Permute(List set, int start, int end) { if (start == end) { - yield return new List(set); + yield return [..set]; } else { @@ -130,8 +129,8 @@ public static IEnumerable> Cycles(IEnumerable seed) var set = new LinkedList(seed); for (int i = 0; i < set.Count; i++) { - yield return new List(set); - var top = set.First(); + yield return [..set]; + var top = set.First!; set.RemoveFirst(); set.AddLast(top); } From 1b39fb9e1175e280fbed4d86ae732a3676ef0836 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Tue, 6 Aug 2024 13:59:29 +0200 Subject: [PATCH 06/30] target lib to netstandard2.0-2.1 target tests to netcore3.1,net8.0,netframework4.7.2 --- FuzzySharp.Test/FuzzySharp.Test.csproj | 4 ++-- FuzzySharp/Extensions/StringExtensions.cs | 4 ++-- FuzzySharp/FuzzySharp.csproj | 5 ++++- FuzzySharp/PreProcess/StringPreprocessorFactory.cs | 2 +- .../TokenAbbreviation/TokenAbbreviationScorerBase.cs | 2 +- .../StrategySensitive/TokenSet/TokenSetScorerBase.cs | 6 +++--- .../SimilarityRatio/Strategy/PartialRatioStrategy.cs | 2 +- FuzzySharp/Utils/Heap.cs | 7 ++----- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/FuzzySharp.Test/FuzzySharp.Test.csproj b/FuzzySharp.Test/FuzzySharp.Test.csproj index 48959cd..4d977ff 100644 --- a/FuzzySharp.Test/FuzzySharp.Test.csproj +++ b/FuzzySharp.Test/FuzzySharp.Test.csproj @@ -1,9 +1,9 @@ - NET8.0 - + NET8.0;netcoreapp3.1;netframework4.7.2 false + 12.0 diff --git a/FuzzySharp/Extensions/StringExtensions.cs b/FuzzySharp/Extensions/StringExtensions.cs index b3ef27d..19e0b5c 100644 --- a/FuzzySharp/Extensions/StringExtensions.cs +++ b/FuzzySharp/Extensions/StringExtensions.cs @@ -36,7 +36,7 @@ public static List ExtractTokens(this string input) public static string[] SplitByAnySpace(this string input) { if (string.IsNullOrWhiteSpace(input)) - return Array.Empty(); + return []; var words = input.Split(Array.Empty(), StringSplitOptions.RemoveEmptyEntries); @@ -56,7 +56,7 @@ public static string NormalizeSpacesAndSort(this string input) { var words = GetSortedWords(input); - return string.Join(' ', words); + return string.Join(" ", words); } } } diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index 975af4b..2265806 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,7 +1,7 @@  - NET8.0 + netstandard2.0;netstandard2.1 true Jacob Bayer Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek @@ -21,9 +21,12 @@ true true snupkg + 12.0 + + diff --git a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs index a454e5d..3560e0e 100644 --- a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs +++ b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs @@ -19,7 +19,7 @@ private static string Default(string input) result[i] = char.IsLetterOrDigit(c) ? char.ToLower(c) : ' '; } - return result.Trim().ToString(); + return ((ReadOnlySpan)result).Trim().ToString(); } public static Func GetPreprocessor(PreprocessMode mode) diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs index 501e4d4..8e5a9b1 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs @@ -63,7 +63,7 @@ public override int Score(string input1, string input2) { var i1 = permutation[i]; var i2 = fewerTokens[i]; - if (StringContainsInOrder(i1, i2)) // must be at least twice as long + if (StringContainsInOrder(i1.AsSpan(), i2.AsSpan())) // must be at least twice as long { var score = Scorer(i1, i2); sum += score; diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 091d3bd..63c29ae 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -11,9 +11,9 @@ public override int Score(string input1, string input2) var tokens1 = new HashSet(input1.SplitByAnySpace()); var tokens2 = new HashSet(input2.SplitByAnySpace()); - var sortedIntersection = string.Join(' ', tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); - var sortedDiff1To2 = (sortedIntersection + " " + string.Join(' ', tokens1.Except(tokens2).OrderBy(s => s))).Trim(); - var sortedDiff2To1 = (sortedIntersection + " " + string.Join(' ', tokens2.Except(tokens1).OrderBy(s => s))).Trim(); + var sortedIntersection = string.Join(" ", tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); + var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.Except(tokens2).OrderBy(s => s))).Trim(); + var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.Except(tokens1).OrderBy(s => s))).Trim(); return new[] { diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index 4c7152f..ce3b02d 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -40,7 +40,7 @@ public static int Calculate(string input1, string input2) var longSubstr = longer.AsSpan()[longStart..longEnd]; - var ratio = Levenshtein.GetRatio(shorter, longSubstr); + var ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); if (ratio > .995) { diff --git a/FuzzySharp/Utils/Heap.cs b/FuzzySharp/Utils/Heap.cs index 88c4f06..bcb937a 100644 --- a/FuzzySharp/Utils/Heap.cs +++ b/FuzzySharp/Utils/Heap.cs @@ -35,12 +35,9 @@ protected Heap(IEnumerable collection) protected Heap(IEnumerable collection, Comparer comparer) { - ArgumentNullException.ThrowIfNull(collection); - ArgumentNullException.ThrowIfNull(comparer); + Comparer = comparer ?? throw new ArgumentNullException(nameof(comparer)); - Comparer = comparer; - - foreach (var item in collection) + foreach (var item in collection ?? throw new ArgumentNullException(nameof(collection))) { if (Count == Capacity) Grow(); From 0b4f383d464692da693100f53c66f7bb36557b61 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Wed, 7 Aug 2024 08:57:27 +0200 Subject: [PATCH 07/30] simplify test --- .../EvaluationTests/EvaluationTests.cs | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs b/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs index eb22945..1ff2b5f 100644 --- a/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs +++ b/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs @@ -36,20 +36,20 @@ public void Evaluate() - var h1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }); - var h2 = string.Join(", ", Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3)); - var h3 = string.Join(", ", Process.ExtractAll("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" })); - var h4 = string.Join(", ", Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40)); - var h5 = string.Join(", ", Process.ExtractSorted("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" })); - - var i1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get()); - - var events = new[] - { - new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" }, - new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" }, - new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" }, - }; + var h1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"]); + var h2 = string.Join(", ", Process.ExtractTop("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], limit: 3)); + var h3 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"])); + var h4 = string.Join(", ", Process.ExtractAll("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"], cutoff: 40)); + var h5 = string.Join(", ", Process.ExtractSorted("goolge", ["google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl"])); + + var i1 = Process.ExtractOne("cowboys", ["Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"], s => s, ScorerCache.Get()); + + string[][] events = + [ + ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"], + ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"], + ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"] + ]; var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" }; var best = Process.ExtractOne(query, events, strings => strings[0]); From bedb9b1f4c10136d970eb706addaaf64dd7c79d1 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Thu, 8 Aug 2024 17:16:51 +0200 Subject: [PATCH 08/30] more optimization + benchmark --- FuzzySharp.Benchmarks/BenchmarkAll.cs | 92 +++++++++++++++++++ .../FuzzySharp.Benchmarks.csproj | 18 ++++ FuzzySharp.Benchmarks/Program.cs | 6 ++ FuzzySharp.sln | 12 ++- .../Scorer/Composite/WeightedRatioScorer.cs | 13 +-- .../TokenSet/TokenSetScorerBase.cs | 25 +++-- 6 files changed, 144 insertions(+), 22 deletions(-) create mode 100644 FuzzySharp.Benchmarks/BenchmarkAll.cs create mode 100644 FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj create mode 100644 FuzzySharp.Benchmarks/Program.cs diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs new file mode 100644 index 0000000..45320fe --- /dev/null +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -0,0 +1,92 @@ +using BenchmarkDotNet.Attributes; +using FuzzySharp.PreProcess; + +namespace FuzzySharp.Benchmarks; + +[MemoryDiagnoser] +public class BenchmarkAll +{ + [Benchmark] + public int Ratio1() + { + return Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); + } + + [Benchmark] + public int Ratio2() + { + return Fuzz.Ratio("mysmilarstring", "mysimilarstring"); + } + + [Benchmark] + public int PartialRatio() + { + return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + } + + [Benchmark] + public int TokenSortRatio() + { + return Fuzz.TokenSortRatio("order words out of", " words out of order"); + } + + [Benchmark] + public int PartialTokenSortRatio() + { + return Fuzz.PartialTokenSortRatio("order words out of", " words out of order"); + } + + [Benchmark] + public int TokenSetRatio() + { + return Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + } + + [Benchmark] + public int PartialTokenSetRatio() + { + return Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + } + + [Benchmark] + public int WeightedRatio() + { + return Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); + } + + [Benchmark] + public int TokenInitialismRatio1() + { + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); + } + + [Benchmark] + public int TokenInitialismRatio2() + { + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); + } + + [Benchmark] + public int TokenInitialismRatio3() + { + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + } + + [Benchmark] + public int PartialTokenInitialismRatio() + { + return Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + } + + [Benchmark] + public int TokenAbbreviationRatio() + { + return Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + } + + [Benchmark] + public int PartialTokenAbbreviationRatio() + { + return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + } +} \ No newline at end of file diff --git a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj new file mode 100644 index 0000000..763a3ce --- /dev/null +++ b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj @@ -0,0 +1,18 @@ + + + + Exe + net8.0 + enable + enable + + + + + + + + + + + diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs new file mode 100644 index 0000000..6f75f6a --- /dev/null +++ b/FuzzySharp.Benchmarks/Program.cs @@ -0,0 +1,6 @@ +using BenchmarkDotNet.Running; +using FuzzySharp; + +BenchmarkRunner.Run(typeof(Program).Assembly); + +//Console.WriteLine(Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog")); \ No newline at end of file diff --git a/FuzzySharp.sln b/FuzzySharp.sln index 78eed94..168d5a0 100644 --- a/FuzzySharp.sln +++ b/FuzzySharp.sln @@ -1,11 +1,13 @@  Microsoft Visual Studio Solution File, Format Version 12.00 -# Visual Studio Version 16 -VisualStudioVersion = 16.0.29806.167 +# Visual Studio Version 17 +VisualStudioVersion = 17.10.35122.118 MinimumVisualStudioVersion = 10.0.40219.1 Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp", "FuzzySharp\FuzzySharp.csproj", "{348B90DA-DA44-45AD-B857-D3A69D05AE46}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzySharp.Test", "FuzzySharp.Test\FuzzySharp.Test.csproj", "{48F4C7CB-E669-410C-A455-DE3330347807}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp.Test", "FuzzySharp.Test\FuzzySharp.Test.csproj", "{48F4C7CB-E669-410C-A455-DE3330347807}" +EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzySharp.Benchmarks", "FuzzySharp.Benchmarks\FuzzySharp.Benchmarks.csproj", "{480CAE39-ACA7-411A-BF6B-72E61ED6E129}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution @@ -21,6 +23,10 @@ Global {48F4C7CB-E669-410C-A455-DE3330347807}.Debug|Any CPU.Build.0 = Debug|Any CPU {48F4C7CB-E669-410C-A455-DE3330347807}.Release|Any CPU.ActiveCfg = Release|Any CPU {48F4C7CB-E669-410C-A455-DE3330347807}.Release|Any CPU.Build.0 = Release|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Debug|Any CPU.Build.0 = Debug|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Release|Any CPU.ActiveCfg = Release|Any CPU + {480CAE39-ACA7-411A-BF6B-72E61ED6E129}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs index 490decb..e423422 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs @@ -1,5 +1,4 @@ using System; -using System.Linq; namespace FuzzySharp.SimilarityRatio.Scorer.Composite { @@ -38,16 +37,12 @@ public override int Score(string input1, string input2) double partialSor = Fuzz.TokenSortRatio(input1, input2) * unbaseScale * partialScale; double partialSet = Fuzz.TokenSetRatio(input1, input2) * unbaseScale * partialScale; - return (int) Math.Round(new[] { baseRatio, partial, partialSor, partialSet }.Max()); + return (int) Math.Round(Math.Max(baseRatio, Math.Max(partial, Math.Max(partialSor, partialSet)))); } - else - { - double tokenSort = Fuzz.TokenSortRatio(input1, input2) * unbaseScale; - double tokenSet = Fuzz.TokenSetRatio(input1, input2) * unbaseScale; - return (int) Math.Round(new[] { baseRatio, tokenSort, tokenSet }.Max()); - } + double tokenSort = Fuzz.TokenSortRatio(input1, input2) * unbaseScale; + double tokenSet = Fuzz.TokenSetRatio(input1, input2) * unbaseScale; + return (int) Math.Round(Math.Max(baseRatio, Math.Max(tokenSort, tokenSet))); } - } } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 63c29ae..7be4f76 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; using FuzzySharp.Extensions; @@ -9,18 +10,22 @@ public abstract class TokenSetScorerBase : StrategySensitiveScorerBase public override int Score(string input1, string input2) { var tokens1 = new HashSet(input1.SplitByAnySpace()); + var tokens1Copy = new HashSet(tokens1); var tokens2 = new HashSet(input2.SplitByAnySpace()); - var sortedIntersection = string.Join(" ", tokens1.Intersect(tokens2).OrderBy(s => s)).Trim(); - var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.Except(tokens2).OrderBy(s => s))).Trim(); - var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.Except(tokens1).OrderBy(s => s))).Trim(); + tokens1Copy.IntersectWith(tokens2); + tokens1.ExceptWith(tokens1Copy); + tokens2.ExceptWith(tokens1Copy); - return new[] - { - Scorer(sortedIntersection, sortedDiff1To2), - Scorer(sortedIntersection, sortedDiff2To1), - Scorer(sortedDiff1To2, sortedDiff2To1) - }.Max(); + var sortedIntersection = string.Join(" ", tokens1Copy.OrderBy(s => s)); + var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.OrderBy(s => s))); + var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.OrderBy(s => s))); + + var score1 = Scorer(sortedIntersection, sortedDiff1To2); + var score2 = Scorer(sortedIntersection, sortedDiff2To1); + var score3 = Scorer(sortedDiff1To2, sortedDiff2To1); + + return Math.Max(score1, Math.Max(score2, score3)); } } } From 605d700b03026f3e1bfc2e61e9c139e45660bc81 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Fri, 9 Aug 2024 11:43:54 +0200 Subject: [PATCH 09/30] formatting --- .../TokenSet/TokenSetScorerBase.cs | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 7be4f76..8b659c3 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -10,16 +10,17 @@ public abstract class TokenSetScorerBase : StrategySensitiveScorerBase public override int Score(string input1, string input2) { var tokens1 = new HashSet(input1.SplitByAnySpace()); - var tokens1Copy = new HashSet(tokens1); var tokens2 = new HashSet(input2.SplitByAnySpace()); - tokens1Copy.IntersectWith(tokens2); - tokens1.ExceptWith(tokens1Copy); - tokens2.ExceptWith(tokens1Copy); + var intersection = new HashSet(tokens1); + intersection.IntersectWith(tokens2); - var sortedIntersection = string.Join(" ", tokens1Copy.OrderBy(s => s)); - var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.OrderBy(s => s))); - var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.OrderBy(s => s))); + tokens1.ExceptWith(intersection); + tokens2.ExceptWith(intersection); + + var sortedIntersection = string.Join(" ", intersection.OrderBy(s => s)); + var sortedDiff1To2 = sortedIntersection + " " + string.Join(" ", tokens1.OrderBy(s => s)); + var sortedDiff2To1 = sortedIntersection + " " + string.Join(" ", tokens2.OrderBy(s => s)); var score1 = Scorer(sortedIntersection, sortedDiff1To2); var score2 = Scorer(sortedIntersection, sortedDiff2To1); From accca0ac4ecd7af08198e2c07c90c8c25da4911a Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 18:18:12 +0200 Subject: [PATCH 10/30] revert some frameworks, fix unit tests --- FuzzySharp/Extensions/StringExtensions.cs | 7 ++++- FuzzySharp/FuzzySharp.csproj | 27 ++++++++++++------- .../TokenSet/TokenSetScorerBase.cs | 4 +-- 3 files changed, 25 insertions(+), 13 deletions(-) diff --git a/FuzzySharp/Extensions/StringExtensions.cs b/FuzzySharp/Extensions/StringExtensions.cs index 19e0b5c..f05209a 100644 --- a/FuzzySharp/Extensions/StringExtensions.cs +++ b/FuzzySharp/Extensions/StringExtensions.cs @@ -38,7 +38,7 @@ public static string[] SplitByAnySpace(this string input) if (string.IsNullOrWhiteSpace(input)) return []; - var words = input.Split(Array.Empty(), StringSplitOptions.RemoveEmptyEntries); + var words = input.Split(EmptyArray(), StringSplitOptions.RemoveEmptyEntries); return words; } @@ -58,5 +58,10 @@ public static string NormalizeSpacesAndSort(this string input) return string.Join(" ", words); } + + private static T[] EmptyArray() + { + return []; + } } } diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index 2265806..d4378df 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,7 +1,7 @@  - netstandard2.0;netstandard2.1 + netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;net6;net8 true Jacob Bayer Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek @@ -11,12 +11,12 @@ MIT git - 2.0.2 + 2.0.3 Include source link true https://github.com/JakeBayer/FuzzySharp - 1.0.4.0 - 1.0.4.0 + 1.0.5.0 + 1.0.5.0 true true @@ -24,15 +24,22 @@ 12.0 + + + + - - - - - - + + + diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 8b659c3..744bcc8 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -19,8 +19,8 @@ public override int Score(string input1, string input2) tokens2.ExceptWith(intersection); var sortedIntersection = string.Join(" ", intersection.OrderBy(s => s)); - var sortedDiff1To2 = sortedIntersection + " " + string.Join(" ", tokens1.OrderBy(s => s)); - var sortedDiff2To1 = sortedIntersection + " " + string.Join(" ", tokens2.OrderBy(s => s)); + var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.OrderBy(s => s))).Trim(); + var sortedDiff2To1 = (sortedIntersection + " " + string.Join(" ", tokens2.OrderBy(s => s))).Trim(); var score1 = Scorer(sortedIntersection, sortedDiff1To2); var score2 = Scorer(sortedIntersection, sortedDiff2To1); From e299031bfad528249c34dd00d511afb2e413a72a Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 21:29:18 +0200 Subject: [PATCH 11/30] little speedup --- FuzzySharp/FuzzySharp.csproj | 2 +- .../TokenSet/TokenSetScorerBase.cs | 22 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index d4378df..b4fd89a 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,7 +1,7 @@  - netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;net6;net8 + netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;NET60;NET80 true Jacob Bayer Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index 744bcc8..af61e86 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -12,11 +12,7 @@ public override int Score(string input1, string input2) var tokens1 = new HashSet(input1.SplitByAnySpace()); var tokens2 = new HashSet(input2.SplitByAnySpace()); - var intersection = new HashSet(tokens1); - intersection.IntersectWith(tokens2); - - tokens1.ExceptWith(intersection); - tokens2.ExceptWith(intersection); + var intersection = GetIntersectionAndExcept(tokens1, tokens2); var sortedIntersection = string.Join(" ", intersection.OrderBy(s => s)); var sortedDiff1To2 = (sortedIntersection + " " + string.Join(" ", tokens1.OrderBy(s => s))).Trim(); @@ -28,5 +24,21 @@ public override int Score(string input1, string input2) return Math.Max(score1, Math.Max(score2, score3)); } + + private static List GetIntersectionAndExcept(HashSet first, HashSet second) + { + List intersection = []; + + foreach (var item in first.ToArray()) + { + if (second.Remove(item)) + { + first.Remove(item); + intersection.Add(item); + } + } + + return intersection; + } } } From 4f0722812a27e65714748809eadedaef611c53b2 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 22:01:00 +0200 Subject: [PATCH 12/30] remove linq.max --- .../Strategy/Generic/PartialRatioStrategyT.cs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index 24f0dd6..09c05e8 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -1,6 +1,4 @@ using System; -using System.Collections.Generic; -using System.Linq; namespace FuzzySharp.SimilarityRatio.Strategy.Generic { @@ -29,7 +27,7 @@ public static int Calculate(T[] input1, T[] input2) var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); - var scores = new List(); + double maxScore = 0; foreach (var matchingBlock in matchingBlocks) { @@ -49,11 +47,13 @@ public static int Calculate(T[] input1, T[] input2) return 100; } - scores.Add(ratio); - + if (ratio > maxScore) + { + maxScore = ratio; + } } - return (int)Math.Round(100 * scores.Max()); + return (int)Math.Round(100 * maxScore); } } } From 51e27ce830b373b33b749ff5ee54a8c881441753 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 22:06:54 +0200 Subject: [PATCH 13/30] revert sealed classes back --- FuzzySharp/Edits/MatchingBlock.cs | 2 +- .../SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs | 2 +- .../Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs | 2 +- .../Scorer/StrategySensitive/Simple/PartialRatioScorer.cs | 2 +- .../TokenDifference/PartialTokenDifferenceScorer.cs | 2 +- .../StrategySensitive/TokenDifference/TokenDifferenceScorer.cs | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/FuzzySharp/Edits/MatchingBlock.cs b/FuzzySharp/Edits/MatchingBlock.cs index 9f0e399..16ea018 100644 --- a/FuzzySharp/Edits/MatchingBlock.cs +++ b/FuzzySharp/Edits/MatchingBlock.cs @@ -1,6 +1,6 @@ namespace FuzzySharp.Edits { - public sealed class MatchingBlock + public class MatchingBlock { public int SourcePos { get; set; } public int DestPos { get; set; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs index e423422..2700f15 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs @@ -2,7 +2,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.Composite { - public sealed class WeightedRatioScorer : ScorerBase + public class WeightedRatioScorer : ScorerBase { private static double UNBASE_SCALE = .95; private static double PARTIAL_SCALE = .90; diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs index e33dee9..12ef6d1 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public sealed class DefaultRatioScorer : SimpleRatioScorerBase + public class DefaultRatioScorer : SimpleRatioScorerBase { protected override Func Scorer => DefaultRatioStrategy.Calculate; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs index 0065965..049d8af 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public sealed class PartialRatioScorer : SimpleRatioScorerBase + public class PartialRatioScorer : SimpleRatioScorerBase { protected override Func Scorer => PartialRatioStrategy.Calculate; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs index 22cbed8..a216197 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public sealed class PartialTokenDifferenceScorer : TokenDifferenceScorerBase + public class PartialTokenDifferenceScorer : TokenDifferenceScorerBase { protected override Func Scorer => PartialRatioStrategy.Calculate; } diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs index a9f59fd..fc2bfb9 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs @@ -3,7 +3,7 @@ namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { - public sealed class TokenDifferenceScorer : TokenDifferenceScorerBase + public class TokenDifferenceScorer : TokenDifferenceScorerBase { protected override Func Scorer => DefaultRatioStrategy.Calculate; } From b813c58c5e302eb11c315ca1353405dd44945a58 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 22:20:27 +0200 Subject: [PATCH 14/30] remove unnecessary changes --- .../Strategy/Generic/PartialRatioStrategyT.cs | 11 ++++++----- .../SimilarityRatio/Strategy/PartialRatioStrategy.cs | 11 ++++++----- FuzzySharp/Utils/Heap.cs | 11 ++++++----- 3 files changed, 18 insertions(+), 15 deletions(-) diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index 09c05e8..2f35fce 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -1,4 +1,5 @@ using System; +using FuzzySharp.Edits; namespace FuzzySharp.SimilarityRatio.Strategy.Generic { @@ -25,22 +26,22 @@ public static int Calculate(T[] input1, T[] input2) longer = input1; } - var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); + MatchingBlock[] matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); double maxScore = 0; foreach (var matchingBlock in matchingBlocks) { - var dist = matchingBlock.DestPos - matchingBlock.SourcePos; + int dist = matchingBlock.DestPos - matchingBlock.SourcePos; - var longStart = dist > 0 ? dist : 0; - var longEnd = longStart + shorter.Length; + int longStart = dist > 0 ? dist : 0; + int longEnd = longStart + shorter.Length; if (longEnd > longer.Length) longEnd = longer.Length; var longSubstr = longer.AsSpan()[longStart..longEnd]; - var ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); + double ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); if (ratio > .995) { diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index ce3b02d..fd95800 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -1,4 +1,5 @@ using System; +using FuzzySharp.Edits; namespace FuzzySharp.SimilarityRatio.Strategy { @@ -25,22 +26,22 @@ public static int Calculate(string input1, string input2) longer = input1; } - var matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); + MatchingBlock[] matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); double maxScore = 0; foreach (var matchingBlock in matchingBlocks) { - var dist = matchingBlock.DestPos - matchingBlock.SourcePos; + int dist = matchingBlock.DestPos - matchingBlock.SourcePos; - var longStart = dist > 0 ? dist : 0; - var longEnd = longStart + shorter.Length; + int longStart = dist > 0 ? dist : 0; + int longEnd = longStart + shorter.Length; if (longEnd > longer.Length) longEnd = longer.Length; var longSubstr = longer.AsSpan()[longStart..longEnd]; - var ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); + double ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); if (ratio > .995) { diff --git a/FuzzySharp/Utils/Heap.cs b/FuzzySharp/Utils/Heap.cs index bcb937a..a732edb 100644 --- a/FuzzySharp/Utils/Heap.cs +++ b/FuzzySharp/Utils/Heap.cs @@ -36,8 +36,9 @@ protected Heap(IEnumerable collection) protected Heap(IEnumerable collection, Comparer comparer) { Comparer = comparer ?? throw new ArgumentNullException(nameof(comparer)); + _ = collection ?? throw new ArgumentNullException(nameof(collection)); - foreach (var item in collection ?? throw new ArgumentNullException(nameof(collection))) + foreach (var item in collection) { if (Count == Capacity) Grow(); @@ -45,7 +46,7 @@ protected Heap(IEnumerable collection, Comparer comparer) _heap[Count++] = item; } - for (var i = Parent(Count - 1); i >= 0; i--) + for (int i = Parent(Count - 1); i >= 0; i--) BubbleDown(i); } @@ -78,7 +79,7 @@ public T GetMin() public T ExtractDominating() { if (Count == 0) throw new InvalidOperationException("Heap is empty"); - var ret = _heap[0]; + T ret = _heap[0]; Count--; Swap(Count, 0); BubbleDown(0); @@ -98,7 +99,7 @@ private void BubbleDown(int i) private int Dominating(int i) { - var dominatingNode = i; + int dominatingNode = i; dominatingNode = GetDominating(YoungChild(i), dominatingNode); dominatingNode = GetDominating(OldChild(i), dominatingNode); @@ -135,7 +136,7 @@ private static int OldChild(int i) private void Grow() { - var newCapacity = Capacity * GrowFactor + MinGrow; + int newCapacity = Capacity * GrowFactor + MinGrow; var newHeap = new T[newCapacity]; Array.Copy(_heap, newHeap, Capacity); _heap = newHeap; From a83f39abce3fba1b195b7ba7c79ebc7ec266d6d0 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 22:43:38 +0200 Subject: [PATCH 15/30] remove unnecessary vshistory from gitignore --- .gitignore | 1 - 1 file changed, 1 deletion(-) diff --git a/.gitignore b/.gitignore index 861efa5..940794e 100644 --- a/.gitignore +++ b/.gitignore @@ -26,7 +26,6 @@ bld/ # Visual Studio 2015 cache/options directory .vs/ -.vshistory/ # Uncomment if you have tasks that create the project's static files in wwwroot #wwwroot/ From 4fb074a0e27b423f8b14b6874f03bcc454e23b53 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 23:26:14 +0200 Subject: [PATCH 16/30] remove unnecessary conversion to span --- FuzzySharp/Levenshtein.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/FuzzySharp/Levenshtein.cs b/FuzzySharp/Levenshtein.cs index a815be1..9315b6f 100644 --- a/FuzzySharp/Levenshtein.cs +++ b/FuzzySharp/Levenshtein.cs @@ -49,7 +49,7 @@ private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, Re len1++; len2++; - Span matrix = new int[len2 * len1]; + int[] matrix = new int[len2 * len1]; for (i = 0; i < len2; i++) matrix[i] = i; @@ -100,7 +100,7 @@ private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, Re private static EditOp[] EditOpsFromCostMatrix(int len1, ReadOnlySpan c1, int p1, int o1, int len2, ReadOnlySpan c2, int p2, int o2, - Span matrix) + int[] matrix) where T: IEquatable { From f43f4ee7f369b20be0c5ec4620baed6e91a20d97 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 10 Aug 2024 23:52:24 +0200 Subject: [PATCH 17/30] replace concurrentdictionary with generic instance creation replace instance lambda with static copied from https://github.com/JakeBayer/FuzzySharp/pull/42 --- FuzzySharp/PreProcess/StringPreprocessorFactory.cs | 2 +- FuzzySharp/SimilarityRatio/ScorerCache.cs | 14 ++++++++------ 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs index 3560e0e..e0aaa59 100644 --- a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs +++ b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs @@ -27,7 +27,7 @@ public static Func GetPreprocessor(PreprocessMode mode) return mode switch { PreprocessMode.Full => Default, - PreprocessMode.None => s => s, + PreprocessMode.None => static s => s, _ => throw new InvalidOperationException($"Invalid string preprocessor mode: {mode}") }; } diff --git a/FuzzySharp/SimilarityRatio/ScorerCache.cs b/FuzzySharp/SimilarityRatio/ScorerCache.cs index 34b405e..15229bb 100644 --- a/FuzzySharp/SimilarityRatio/ScorerCache.cs +++ b/FuzzySharp/SimilarityRatio/ScorerCache.cs @@ -1,15 +1,17 @@ -using System; -using System.Collections.Concurrent; +using System.Runtime.CompilerServices; using FuzzySharp.SimilarityRatio.Scorer; namespace FuzzySharp.SimilarityRatio { public static class ScorerCache { - private static readonly ConcurrentDictionary s_scorerCache = new ConcurrentDictionary(); - public static IRatioScorer Get() where T : IRatioScorer, new() + [MethodImpl(MethodImplOptions.AggressiveInlining)] + public static IRatioScorer Get() where T : IRatioScorer, new() => GenericCache.Instance; + + private static class GenericCache + where T : IRatioScorer, new() { - return s_scorerCache.GetOrAdd(typeof(T), new T()); + public static readonly T Instance = new T(); } } -} +} \ No newline at end of file From 81bf39f2b9081fecbbbaf36d65e80c2b2975ebc0 Mon Sep 17 00:00:00 2001 From: "yevhen.cherkes" Date: Mon, 12 Aug 2024 09:35:47 +0200 Subject: [PATCH 18/30] remove duplicate, format --- FuzzySharp/FuzzySharp.csproj | 42 +++++++++++++++++------------------- 1 file changed, 20 insertions(+), 22 deletions(-) diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index b4fd89a..529ab21 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,27 +1,25 @@  - - netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;NET60;NET80 - true - Jacob Bayer - Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek - Fuzzy String Matching Comparison FuzzyWuzzy FuzzySharp - false - https://github.com/JakeBayer/FuzzySharp - MIT - git - - 2.0.3 - Include source link - true - https://github.com/JakeBayer/FuzzySharp - 1.0.5.0 - 1.0.5.0 - - true - true - snupkg - 12.0 + + 1.0.5.0 + Jacob Bayer + + Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek + 1.0.5.0 + true + true + 12.0 + MIT + https://github.com/JakeBayer/FuzzySharp + Include source link + false + Fuzzy String Matching Comparison FuzzyWuzzy FuzzySharp + true + git + https://github.com/JakeBayer/FuzzySharp + snupkg + netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;NET60;NET80 + 2.0.3 Date: Mon, 12 Aug 2024 12:38:37 +0200 Subject: [PATCH 19/30] remove unnecessary calls asspan in the cycle --- FuzzySharp/Levenshtein.cs | 6 +++--- .../Strategy/PartialRatioStrategy.cs | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/FuzzySharp/Levenshtein.cs b/FuzzySharp/Levenshtein.cs index 9315b6f..1660705 100644 --- a/FuzzySharp/Levenshtein.cs +++ b/FuzzySharp/Levenshtein.cs @@ -12,9 +12,9 @@ private static EditOp[] GetEditOps(T[] arr1, T[] arr2) where T : IEquatable s1, ReadOnlySpan s2) { - return GetEditOps(s1.Length, s1.AsSpan(), s2.Length, s2.AsSpan()); + return GetEditOps(s1.Length, s1, s2.Length, s2); } private static EditOp[] GetEditOps(int len1, ReadOnlySpan c1, int len2, ReadOnlySpan c2) where T : IEquatable @@ -227,7 +227,7 @@ public static MatchingBlock[] GetMatchingBlocks(T[] s1, T[] s2) where T : IEq } // Special Case - public static MatchingBlock[] GetMatchingBlocks(string s1, string s2) + public static MatchingBlock[] GetMatchingBlocks(ReadOnlySpan s1, ReadOnlySpan s2) { return GetMatchingBlocks(s1.Length, s2.Length, GetEditOps(s1, s2)); diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index fd95800..20d9b26 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -7,23 +7,23 @@ internal static class PartialRatioStrategy { public static int Calculate(string input1, string input2) { - string shorter; - string longer; - if (input1.Length == 0 || input2.Length == 0) { return 0; } + ReadOnlySpan shorter; + ReadOnlySpan longer; + if (input1.Length < input2.Length) { - shorter = input1; - longer = input2; + shorter = input1.AsSpan(); + longer = input2.AsSpan(); } else { - shorter = input2; - longer = input1; + shorter = input2.AsSpan(); + longer = input1.AsSpan(); } MatchingBlock[] matchingBlocks = Levenshtein.GetMatchingBlocks(shorter, longer); @@ -39,9 +39,9 @@ public static int Calculate(string input1, string input2) if (longEnd > longer.Length) longEnd = longer.Length; - var longSubstr = longer.AsSpan()[longStart..longEnd]; + var longSubstr = longer[longStart..longEnd]; - double ratio = Levenshtein.GetRatio(shorter.AsSpan(), longSubstr); + double ratio = Levenshtein.GetRatio(shorter, longSubstr); if (ratio > .995) { From dbd6231e256150c7be6b0c827fce9662d1eab299 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 17 Aug 2024 12:52:32 +0200 Subject: [PATCH 20/30] Add prefix Raffinert to namespaces to be able to use and compare both original and refined versions of library. --- FuzzySharp.Benchmarks/BenchmarkAll.cs | 4 ++-- FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj | 2 ++ FuzzySharp.Benchmarks/Program.cs | 2 +- FuzzySharp.Test/EvaluationTests/EvaluationTests.cs | 12 ++++++------ FuzzySharp.Test/FuzzySharp.Test.csproj | 2 ++ FuzzySharp.Test/FuzzyTests/ProcessTests.cs | 6 +++--- FuzzySharp.Test/FuzzyTests/RatioTests.cs | 4 ++-- FuzzySharp.Test/FuzzyTests/RegressionTests.cs | 13 ++++++------- .../ScorerTests/TokenSetScorerBaseTest.cs | 10 +++++----- FuzzySharp.sln | 2 +- FuzzySharp/Edits/EditOp.cs | 2 +- FuzzySharp/Edits/MatchingBlock.cs | 2 +- FuzzySharp/Edits/OpCode.cs | 2 +- FuzzySharp/Extensions/EnumerableExtensions.cs | 4 ++-- FuzzySharp/Extensions/StringExtensions.cs | 2 +- FuzzySharp/Extractor/ExtractedResult.cs | 2 +- FuzzySharp/Extractor/ResultExtractor.cs | 6 +++--- FuzzySharp/Fuzz.cs | 10 +++++----- FuzzySharp/FuzzySharp.csproj | 12 ++++-------- FuzzySharp/Levenshtein.cs | 4 ++-- FuzzySharp/PreProcess/PreprocessMode.cs | 2 +- FuzzySharp/PreProcess/StringPreprocessorFactory.cs | 2 +- FuzzySharp/Process.cs | 12 ++++++------ .../Scorer/Composite/WeightedRatioScorer.cs | 2 +- .../SimilarityRatio/Scorer/Generic/IRatioScorer.cs | 2 +- .../SimilarityRatio/Scorer/Generic/ScorerBase.cs | 2 +- FuzzySharp/SimilarityRatio/Scorer/IRatioScorer.cs | 4 ++-- FuzzySharp/SimilarityRatio/Scorer/ScorerBase.cs | 4 ++-- .../Generic/StrategySensitiveScorerBase.cs | 4 ++-- .../StrategySensitive/Simple/DefaultRatioScorer.cs | 4 ++-- .../StrategySensitive/Simple/PartialRatioScorer.cs | 4 ++-- .../Simple/SimpleRatioScorerBase.cs | 2 +- .../StrategySensitiveScorerBase.cs | 2 +- .../PartialTokenAbbreviationScorer.cs | 4 ++-- .../TokenAbbreviation/TokenAbbreviationScorer.cs | 4 ++-- .../TokenAbbreviationScorerBase.cs | 6 +++--- .../TokenDifference/PartialTokenDifferenceScorer.cs | 4 ++-- .../TokenDifference/TokenDifferenceScorer.cs | 4 ++-- .../TokenDifference/TokenDifferenceScorerBase.cs | 8 ++++---- .../TokenInitialism/PartialTokenInitialismScorer.cs | 4 ++-- .../TokenInitialism/TokenInitialismScorer.cs | 4 ++-- .../TokenInitialism/TokenInitialismScorerBase.cs | 4 ++-- .../TokenSet/PartialTokenSetScorer.cs | 4 ++-- .../StrategySensitive/TokenSet/TokenSetScorer.cs | 4 ++-- .../TokenSet/TokenSetScorerBase.cs | 4 ++-- .../TokenSort/PartialTokenSortScorer.cs | 4 ++-- .../TokenSort/TokenSortAlgorithm.cs | 4 ++-- .../StrategySensitive/TokenSort/TokenSortScorer.cs | 4 ++-- FuzzySharp/SimilarityRatio/ScorerCache.cs | 4 ++-- .../Strategy/DefaultRatioStrategy.cs | 2 +- .../Strategy/Generic/DefaultRatioStrategyT.cs | 2 +- .../Strategy/Generic/PartialRatioStrategyT.cs | 4 ++-- .../Strategy/PartialRatioStrategy.cs | 4 ++-- FuzzySharp/Utils/Heap.cs | 2 +- FuzzySharp/Utils/Permutation.cs | 2 +- 55 files changed, 117 insertions(+), 118 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs index 45320fe..e0f2f27 100644 --- a/FuzzySharp.Benchmarks/BenchmarkAll.cs +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -1,7 +1,7 @@ using BenchmarkDotNet.Attributes; -using FuzzySharp.PreProcess; +using Raffinert.FuzzySharp.PreProcess; -namespace FuzzySharp.Benchmarks; +namespace Raffinert.FuzzySharp.Benchmarks; [MemoryDiagnoser] public class BenchmarkAll diff --git a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj index 763a3ce..5f3c618 100644 --- a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj +++ b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj @@ -5,6 +5,8 @@ net8.0 enable enable + Raffinert.$(MSBuildProjectName) + Raffinert.$(MSBuildProjectName.Replace(" ", "_")) diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs index 6f75f6a..9f90b4d 100644 --- a/FuzzySharp.Benchmarks/Program.cs +++ b/FuzzySharp.Benchmarks/Program.cs @@ -1,5 +1,5 @@ using BenchmarkDotNet.Running; -using FuzzySharp; +using Raffinert.FuzzySharp; BenchmarkRunner.Run(typeof(Program).Assembly); diff --git a/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs b/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs index 1ff2b5f..726cb7c 100644 --- a/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs +++ b/FuzzySharp.Test/EvaluationTests/EvaluationTests.cs @@ -1,10 +1,10 @@ -using FuzzySharp.PreProcess; -using FuzzySharp.SimilarityRatio; -using FuzzySharp.SimilarityRatio.Scorer.Composite; -using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; -using NUnit.Framework; +using NUnit.Framework; +using Raffinert.FuzzySharp.PreProcess; +using Raffinert.FuzzySharp.SimilarityRatio; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; -namespace FuzzySharp.Test.EvaluationTests +namespace Raffinert.FuzzySharp.Test.EvaluationTests { [TestFixture] public class EvaluationTests diff --git a/FuzzySharp.Test/FuzzySharp.Test.csproj b/FuzzySharp.Test/FuzzySharp.Test.csproj index 4d977ff..6f8fa0a 100644 --- a/FuzzySharp.Test/FuzzySharp.Test.csproj +++ b/FuzzySharp.Test/FuzzySharp.Test.csproj @@ -4,6 +4,8 @@ NET8.0;netcoreapp3.1;netframework4.7.2 false 12.0 + Raffinert.$(MSBuildProjectName) + Raffinert.$(MSBuildProjectName.Replace(" ", "_")) diff --git a/FuzzySharp.Test/FuzzyTests/ProcessTests.cs b/FuzzySharp.Test/FuzzyTests/ProcessTests.cs index d5cd503..0821252 100644 --- a/FuzzySharp.Test/FuzzyTests/ProcessTests.cs +++ b/FuzzySharp.Test/FuzzyTests/ProcessTests.cs @@ -1,10 +1,10 @@ using System.Collections.Generic; using System.Linq; -using FuzzySharp.SimilarityRatio; -using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; using NUnit.Framework; +using Raffinert.FuzzySharp.SimilarityRatio; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; -namespace FuzzySharp.Test.FuzzyTests +namespace Raffinert.FuzzySharp.Test.FuzzyTests { [TestFixture] public class ProcessTests diff --git a/FuzzySharp.Test/FuzzyTests/RatioTests.cs b/FuzzySharp.Test/FuzzyTests/RatioTests.cs index ec499b0..63375db 100644 --- a/FuzzySharp.Test/FuzzyTests/RatioTests.cs +++ b/FuzzySharp.Test/FuzzyTests/RatioTests.cs @@ -1,7 +1,7 @@ -using FuzzySharp.PreProcess; using NUnit.Framework; +using Raffinert.FuzzySharp.PreProcess; -namespace FuzzySharp.Test.FuzzyTests +namespace Raffinert.FuzzySharp.Test.FuzzyTests { [TestFixture] public class RatioTests diff --git a/FuzzySharp.Test/FuzzyTests/RegressionTests.cs b/FuzzySharp.Test/FuzzyTests/RegressionTests.cs index 9fea3f8..01979d1 100644 --- a/FuzzySharp.Test/FuzzyTests/RegressionTests.cs +++ b/FuzzySharp.Test/FuzzyTests/RegressionTests.cs @@ -1,12 +1,11 @@ - -using FuzzySharp.SimilarityRatio; -using FuzzySharp.SimilarityRatio.Scorer; -using NUnit.Framework; -using System; +using System; using System.Linq; using System.Reflection; +using NUnit.Framework; +using Raffinert.FuzzySharp.SimilarityRatio; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer; -namespace FuzzySharp.Test.FuzzyTests +namespace Raffinert.FuzzySharp.Test.FuzzyTests { [TestFixture] public class RegressionTests @@ -31,7 +30,7 @@ public void TestScoringEmptyString() }).ToList(); var scorerTypes = types.Where(t => scorerType.IsAssignableFrom(t) && !t.IsAbstract && t.IsClass).ToList(); - string nullString = null; //Null doesnt seem to be handled by any scorer + string nullString = null; //Null doesn't seem to be handled by any scorer string emptyString = ""; string whitespaceString = " "; diff --git a/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs b/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs index 481b1f2..12f822f 100644 --- a/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs +++ b/FuzzySharp.Test/FuzzyTests/ScorerTests/TokenSetScorerBaseTest.cs @@ -1,9 +1,9 @@ -using FuzzySharp.SimilarityRatio; -using FuzzySharp.SimilarityRatio.Scorer; -using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; -using NUnit.Framework; +using NUnit.Framework; +using Raffinert.FuzzySharp.SimilarityRatio; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; -namespace FuzzySharp.Test.FuzzyTests.ScorerTests +namespace Raffinert.FuzzySharp.Test.FuzzyTests.ScorerTests { [TestFixture] public class TokenSetScorerBaseTest diff --git a/FuzzySharp.sln b/FuzzySharp.sln index 168d5a0..ac51363 100644 --- a/FuzzySharp.sln +++ b/FuzzySharp.sln @@ -7,7 +7,7 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp", "FuzzySharp\Fu EndProject Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp.Test", "FuzzySharp.Test\FuzzySharp.Test.csproj", "{48F4C7CB-E669-410C-A455-DE3330347807}" EndProject -Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "FuzzySharp.Benchmarks", "FuzzySharp.Benchmarks\FuzzySharp.Benchmarks.csproj", "{480CAE39-ACA7-411A-BF6B-72E61ED6E129}" +Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "FuzzySharp.Benchmarks", "FuzzySharp.Benchmarks\FuzzySharp.Benchmarks.csproj", "{480CAE39-ACA7-411A-BF6B-72E61ED6E129}" EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution diff --git a/FuzzySharp/Edits/EditOp.cs b/FuzzySharp/Edits/EditOp.cs index 674bc13..eeaa1b2 100644 --- a/FuzzySharp/Edits/EditOp.cs +++ b/FuzzySharp/Edits/EditOp.cs @@ -1,4 +1,4 @@ -namespace FuzzySharp.Edits +namespace Raffinert.FuzzySharp.Edits { public enum EditType { diff --git a/FuzzySharp/Edits/MatchingBlock.cs b/FuzzySharp/Edits/MatchingBlock.cs index 16ea018..66f9629 100644 --- a/FuzzySharp/Edits/MatchingBlock.cs +++ b/FuzzySharp/Edits/MatchingBlock.cs @@ -1,4 +1,4 @@ -namespace FuzzySharp.Edits +namespace Raffinert.FuzzySharp.Edits { public class MatchingBlock { diff --git a/FuzzySharp/Edits/OpCode.cs b/FuzzySharp/Edits/OpCode.cs index f6fa1e3..f43a09e 100644 --- a/FuzzySharp/Edits/OpCode.cs +++ b/FuzzySharp/Edits/OpCode.cs @@ -1,4 +1,4 @@ -namespace FuzzySharp.Edits +namespace Raffinert.FuzzySharp.Edits { public class OpCode { diff --git a/FuzzySharp/Extensions/EnumerableExtensions.cs b/FuzzySharp/Extensions/EnumerableExtensions.cs index ad8af69..ecead6d 100644 --- a/FuzzySharp/Extensions/EnumerableExtensions.cs +++ b/FuzzySharp/Extensions/EnumerableExtensions.cs @@ -1,8 +1,8 @@ using System; using System.Collections.Generic; -using FuzzySharp.Utils; +using Raffinert.FuzzySharp.Utils; -namespace FuzzySharp.Extensions +namespace Raffinert.FuzzySharp.Extensions { public static class EnumerableExtensions { diff --git a/FuzzySharp/Extensions/StringExtensions.cs b/FuzzySharp/Extensions/StringExtensions.cs index f05209a..aa6afdf 100644 --- a/FuzzySharp/Extensions/StringExtensions.cs +++ b/FuzzySharp/Extensions/StringExtensions.cs @@ -1,7 +1,7 @@ using System; using System.Collections.Generic; -namespace FuzzySharp.Extensions +namespace Raffinert.FuzzySharp.Extensions { internal static class StringExtensions { diff --git a/FuzzySharp/Extractor/ExtractedResult.cs b/FuzzySharp/Extractor/ExtractedResult.cs index 30aad9c..920f727 100644 --- a/FuzzySharp/Extractor/ExtractedResult.cs +++ b/FuzzySharp/Extractor/ExtractedResult.cs @@ -1,7 +1,7 @@ using System; using System.Collections.Generic; -namespace FuzzySharp.Extractor +namespace Raffinert.FuzzySharp.Extractor { public class ExtractedResult : IComparable> { diff --git a/FuzzySharp/Extractor/ResultExtractor.cs b/FuzzySharp/Extractor/ResultExtractor.cs index 66b7168..b0bb657 100644 --- a/FuzzySharp/Extractor/ResultExtractor.cs +++ b/FuzzySharp/Extractor/ResultExtractor.cs @@ -1,10 +1,10 @@ using System; using System.Collections.Generic; using System.Linq; -using FuzzySharp.Extensions; -using FuzzySharp.SimilarityRatio.Scorer; +using Raffinert.FuzzySharp.Extensions; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer; -namespace FuzzySharp.Extractor +namespace Raffinert.FuzzySharp.Extractor { public static class ResultExtractor { diff --git a/FuzzySharp/Fuzz.cs b/FuzzySharp/Fuzz.cs index 77a4176..3e66f91 100644 --- a/FuzzySharp/Fuzz.cs +++ b/FuzzySharp/Fuzz.cs @@ -1,9 +1,9 @@ -using FuzzySharp.PreProcess; -using FuzzySharp.SimilarityRatio; -using FuzzySharp.SimilarityRatio.Scorer.Composite; -using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; +using Raffinert.FuzzySharp.PreProcess; +using Raffinert.FuzzySharp.SimilarityRatio; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive; -namespace FuzzySharp +namespace Raffinert.FuzzySharp { public static class Fuzz { diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index 529ab21..856b450 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -2,7 +2,7 @@ 1.0.5.0 - Jacob Bayer + Jacob Bayer;Yevhen Cherkes Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek 1.0.5.0 @@ -20,15 +20,11 @@ snupkg netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;NET60;NET80 2.0.3 + Raffinert.$(MSBuildProjectName) + Raffinert.$(MSBuildProjectName.Replace(" ", "_")) - + diff --git a/FuzzySharp/Levenshtein.cs b/FuzzySharp/Levenshtein.cs index 1660705..36867c7 100644 --- a/FuzzySharp/Levenshtein.cs +++ b/FuzzySharp/Levenshtein.cs @@ -1,8 +1,8 @@ using System; using System.Diagnostics; -using FuzzySharp.Edits; +using Raffinert.FuzzySharp.Edits; -namespace FuzzySharp +namespace Raffinert.FuzzySharp { public static class Levenshtein { diff --git a/FuzzySharp/PreProcess/PreprocessMode.cs b/FuzzySharp/PreProcess/PreprocessMode.cs index be801e7..b8d1dde 100644 --- a/FuzzySharp/PreProcess/PreprocessMode.cs +++ b/FuzzySharp/PreProcess/PreprocessMode.cs @@ -1,4 +1,4 @@ -namespace FuzzySharp.PreProcess +namespace Raffinert.FuzzySharp.PreProcess { public enum PreprocessMode { diff --git a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs index e0aaa59..fff0d3c 100644 --- a/FuzzySharp/PreProcess/StringPreprocessorFactory.cs +++ b/FuzzySharp/PreProcess/StringPreprocessorFactory.cs @@ -1,6 +1,6 @@ using System; -namespace FuzzySharp.PreProcess +namespace Raffinert.FuzzySharp.PreProcess { internal static class StringPreprocessorFactory { diff --git a/FuzzySharp/Process.cs b/FuzzySharp/Process.cs index 87717fe..f96bbd7 100644 --- a/FuzzySharp/Process.cs +++ b/FuzzySharp/Process.cs @@ -1,12 +1,12 @@ using System; using System.Collections.Generic; -using FuzzySharp.Extractor; -using FuzzySharp.PreProcess; -using FuzzySharp.SimilarityRatio; -using FuzzySharp.SimilarityRatio.Scorer; -using FuzzySharp.SimilarityRatio.Scorer.Composite; +using Raffinert.FuzzySharp.Extractor; +using Raffinert.FuzzySharp.PreProcess; +using Raffinert.FuzzySharp.SimilarityRatio; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite; -namespace FuzzySharp +namespace Raffinert.FuzzySharp { public static class Process { diff --git a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs index 2700f15..e8f7b03 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/Composite/WeightedRatioScorer.cs @@ -1,6 +1,6 @@ using System; -namespace FuzzySharp.SimilarityRatio.Scorer.Composite +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite { public class WeightedRatioScorer : ScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/Generic/IRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/Generic/IRatioScorer.cs index 4584037..900bf56 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/Generic/IRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/Generic/IRatioScorer.cs @@ -1,6 +1,6 @@ using System; -namespace FuzzySharp.SimilarityRatio.Scorer.Generic +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.Generic { public interface IRatioScorer where T : IEquatable { diff --git a/FuzzySharp/SimilarityRatio/Scorer/Generic/ScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/Generic/ScorerBase.cs index 7898e76..90605a7 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/Generic/ScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/Generic/ScorerBase.cs @@ -1,6 +1,6 @@ using System; -namespace FuzzySharp.SimilarityRatio.Scorer.Generic +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.Generic { public abstract class ScorerBase : IRatioScorer where T : IEquatable { diff --git a/FuzzySharp/SimilarityRatio/Scorer/IRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/IRatioScorer.cs index 1a69926..526e70b 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/IRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/IRatioScorer.cs @@ -1,6 +1,6 @@ -using FuzzySharp.PreProcess; +using Raffinert.FuzzySharp.PreProcess; -namespace FuzzySharp.SimilarityRatio.Scorer +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer { public interface IRatioScorer { diff --git a/FuzzySharp/SimilarityRatio/Scorer/ScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/ScorerBase.cs index f898d90..a44c819 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/ScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/ScorerBase.cs @@ -1,6 +1,6 @@ -using FuzzySharp.PreProcess; +using Raffinert.FuzzySharp.PreProcess; -namespace FuzzySharp.SimilarityRatio.Scorer +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer { public abstract class ScorerBase : IRatioScorer { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Generic/StrategySensitiveScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Generic/StrategySensitiveScorerBase.cs index cec7017..20471d5 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Generic/StrategySensitiveScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Generic/StrategySensitiveScorerBase.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Scorer.Generic; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Generic; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive.Generic +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive.Generic { public abstract class StrategySensitiveScorerBase : ScorerBase where T : IEquatable { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs index 12ef6d1..393f972 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/DefaultRatioScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class DefaultRatioScorer : SimpleRatioScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs index 049d8af..3127672 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/PartialRatioScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class PartialRatioScorer : SimpleRatioScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/SimpleRatioScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/SimpleRatioScorerBase.cs index e2a15d4..8b24c56 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/SimpleRatioScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/Simple/SimpleRatioScorerBase.cs @@ -1,4 +1,4 @@ -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public abstract class SimpleRatioScorerBase : StrategySensitiveScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/StrategySensitiveScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/StrategySensitiveScorerBase.cs index 6f01e30..3414bb3 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/StrategySensitiveScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/StrategySensitiveScorerBase.cs @@ -1,6 +1,6 @@ using System; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public abstract class StrategySensitiveScorerBase : ScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/PartialTokenAbbreviationScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/PartialTokenAbbreviationScorer.cs index 4812645..6e30b05 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/PartialTokenAbbreviationScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/PartialTokenAbbreviationScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class PartialTokenAbbreviationScorer : TokenAbbreviationScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorer.cs index 847fec8..9650ce2 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class TokenAbbreviationScorer : TokenAbbreviationScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs index 8e5a9b1..2dca081 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenAbbreviation/TokenAbbreviationScorerBase.cs @@ -1,9 +1,9 @@ using System; using System.Collections.Generic; -using FuzzySharp.Extensions; -using FuzzySharp.Utils; +using Raffinert.FuzzySharp.Extensions; +using Raffinert.FuzzySharp.Utils; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public abstract class TokenAbbreviationScorerBase : StrategySensitiveScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs index a216197..6c76275 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/PartialTokenDifferenceScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy.Generic; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class PartialTokenDifferenceScorer : TokenDifferenceScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs index fc2bfb9..21455a1 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy.Generic; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class TokenDifferenceScorer : TokenDifferenceScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs index 0e02491..09734c1 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenDifference/TokenDifferenceScorerBase.cs @@ -1,8 +1,8 @@ -using FuzzySharp.Extensions; -using FuzzySharp.PreProcess; -using FuzzySharp.SimilarityRatio.Scorer.StrategySensitive.Generic; +using Raffinert.FuzzySharp.Extensions; +using Raffinert.FuzzySharp.PreProcess; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive.Generic; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public abstract class TokenDifferenceScorerBase : StrategySensitiveScorerBase, IRatioScorer { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/PartialTokenInitialismScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/PartialTokenInitialismScorer.cs index 1eda5b8..2ebcc55 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/PartialTokenInitialismScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/PartialTokenInitialismScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class PartialTokenInitialismScorer : TokenInitialismScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorer.cs index 3ea2293..e6dfa82 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class TokenInitialismScorer : TokenInitialismScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs index 4dd3a6b..8a8e293 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenInitialism/TokenInitialismScorerBase.cs @@ -1,7 +1,7 @@ using System.Linq; -using FuzzySharp.Extensions; +using Raffinert.FuzzySharp.Extensions; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public abstract class TokenInitialismScorerBase : StrategySensitiveScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/PartialTokenSetScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/PartialTokenSetScorer.cs index 1011eed..54f4195 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/PartialTokenSetScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/PartialTokenSetScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class PartialTokenSetScorer : TokenSetScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorer.cs index fd88724..7f254be 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class TokenSetScorer : TokenSetScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs index af61e86..1d3cc31 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSet/TokenSetScorerBase.cs @@ -1,9 +1,9 @@ using System; using System.Collections.Generic; using System.Linq; -using FuzzySharp.Extensions; +using Raffinert.FuzzySharp.Extensions; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public abstract class TokenSetScorerBase : StrategySensitiveScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/PartialTokenSortScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/PartialTokenSortScorer.cs index 5faa0e1..5aec4ae 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/PartialTokenSortScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/PartialTokenSortScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class PartialTokenSortScorer : TokenSortScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs index 032b779..e11e6d7 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortAlgorithm.cs @@ -1,6 +1,6 @@ -using FuzzySharp.Extensions; +using Raffinert.FuzzySharp.Extensions; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public abstract class TokenSortScorerBase : StrategySensitiveScorerBase { diff --git a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortScorer.cs b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortScorer.cs index 8cd6821..4a3e8f5 100644 --- a/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortScorer.cs +++ b/FuzzySharp/SimilarityRatio/Scorer/StrategySensitive/TokenSort/TokenSortScorer.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.SimilarityRatio.Strategy; +using Raffinert.FuzzySharp.SimilarityRatio.Strategy; -namespace FuzzySharp.SimilarityRatio.Scorer.StrategySensitive +namespace Raffinert.FuzzySharp.SimilarityRatio.Scorer.StrategySensitive { public class TokenSortScorer : TokenSortScorerBase { diff --git a/FuzzySharp/SimilarityRatio/ScorerCache.cs b/FuzzySharp/SimilarityRatio/ScorerCache.cs index 15229bb..a1f2ca9 100644 --- a/FuzzySharp/SimilarityRatio/ScorerCache.cs +++ b/FuzzySharp/SimilarityRatio/ScorerCache.cs @@ -1,7 +1,7 @@ using System.Runtime.CompilerServices; -using FuzzySharp.SimilarityRatio.Scorer; +using Raffinert.FuzzySharp.SimilarityRatio.Scorer; -namespace FuzzySharp.SimilarityRatio +namespace Raffinert.FuzzySharp.SimilarityRatio { public static class ScorerCache { diff --git a/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs index 72d6cd3..d7676a0 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/DefaultRatioStrategy.cs @@ -1,6 +1,6 @@ using System; -namespace FuzzySharp.SimilarityRatio.Strategy +namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy { internal static class DefaultRatioStrategy { diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs index f6efd79..857673f 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/DefaultRatioStrategyT.cs @@ -1,6 +1,6 @@ using System; -namespace FuzzySharp.SimilarityRatio.Strategy.Generic +namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic { internal static class DefaultRatioStrategy where T : IEquatable { diff --git a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs index 2f35fce..badd858 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/Generic/PartialRatioStrategyT.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.Edits; +using Raffinert.FuzzySharp.Edits; -namespace FuzzySharp.SimilarityRatio.Strategy.Generic +namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy.Generic { internal static class PartialRatioStrategy where T : IEquatable { diff --git a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs index 20d9b26..45bbf28 100644 --- a/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs +++ b/FuzzySharp/SimilarityRatio/Strategy/PartialRatioStrategy.cs @@ -1,7 +1,7 @@ using System; -using FuzzySharp.Edits; +using Raffinert.FuzzySharp.Edits; -namespace FuzzySharp.SimilarityRatio.Strategy +namespace Raffinert.FuzzySharp.SimilarityRatio.Strategy { internal static class PartialRatioStrategy { diff --git a/FuzzySharp/Utils/Heap.cs b/FuzzySharp/Utils/Heap.cs index a732edb..9c3e611 100644 --- a/FuzzySharp/Utils/Heap.cs +++ b/FuzzySharp/Utils/Heap.cs @@ -3,7 +3,7 @@ using System.Collections.Generic; using System.Linq; -namespace FuzzySharp.Utils +namespace Raffinert.FuzzySharp.Utils { public abstract class Heap : IEnumerable { diff --git a/FuzzySharp/Utils/Permutation.cs b/FuzzySharp/Utils/Permutation.cs index d28a748..aa9fad1 100644 --- a/FuzzySharp/Utils/Permutation.cs +++ b/FuzzySharp/Utils/Permutation.cs @@ -2,7 +2,7 @@ using System.Collections.Generic; using System.Linq; -namespace FuzzySharp.Utils +namespace Raffinert.FuzzySharp.Utils { public class Permutor where T : IComparable { From 6c5220019cef24abf7b2823d45878f11c7ceb86a Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 17 Aug 2024 18:30:49 +0200 Subject: [PATCH 21/30] reference classic fuzzysharp in benchmarks --- FuzzySharp.Benchmarks/BenchmarkAll.cs | 115 +++++++++++++++--- .../FuzzySharp.Benchmarks.csproj | 3 +- 2 files changed, 103 insertions(+), 15 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs index e0f2f27..f3d73d0 100644 --- a/FuzzySharp.Benchmarks/BenchmarkAll.cs +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -1,5 +1,8 @@ using BenchmarkDotNet.Attributes; using Raffinert.FuzzySharp.PreProcess; +using RaffinertFuzz = Raffinert.FuzzySharp.Fuzz; +using ClassicFuzz = FuzzySharp.Fuzz; +using ClassicPreprocess = FuzzySharp.PreProcess; namespace Raffinert.FuzzySharp.Benchmarks; @@ -9,84 +12,168 @@ public class BenchmarkAll [Benchmark] public int Ratio1() { - return Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); + return RaffinertFuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); } [Benchmark] public int Ratio2() { - return Fuzz.Ratio("mysmilarstring", "mysimilarstring"); + return RaffinertFuzz.Ratio("mysmilarstring", "mysimilarstring"); } [Benchmark] public int PartialRatio() { - return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + return RaffinertFuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); } [Benchmark] public int TokenSortRatio() { - return Fuzz.TokenSortRatio("order words out of", " words out of order"); + return RaffinertFuzz.TokenSortRatio("order words out of", " words out of order"); } [Benchmark] public int PartialTokenSortRatio() { - return Fuzz.PartialTokenSortRatio("order words out of", " words out of order"); + return RaffinertFuzz.PartialTokenSortRatio("order words out of", " words out of order"); } [Benchmark] public int TokenSetRatio() { - return Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + return RaffinertFuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); } [Benchmark] public int PartialTokenSetRatio() { - return Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + return RaffinertFuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); } [Benchmark] public int WeightedRatio() { - return Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); + return RaffinertFuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); } [Benchmark] public int TokenInitialismRatio1() { - return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); + return RaffinertFuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); } [Benchmark] public int TokenInitialismRatio2() { - return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); + return RaffinertFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); } [Benchmark] public int TokenInitialismRatio3() { - return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + return RaffinertFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); } [Benchmark] public int PartialTokenInitialismRatio() { - return Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + return RaffinertFuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); } [Benchmark] public int TokenAbbreviationRatio() { - return Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + return RaffinertFuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); } [Benchmark] public int PartialTokenAbbreviationRatio() { - return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + return RaffinertFuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + } + + [Benchmark] + public int Ratio1Classic() + { + return ClassicFuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); + } + + [Benchmark] + public int Ratio2Classic() + { + return ClassicFuzz.Ratio("mysmilarstring", "mysimilarstring"); + } + + [Benchmark] + public int PartialRatioClassic() + { + return ClassicFuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + } + + [Benchmark] + public int TokenSortRatioClassic() + { + return ClassicFuzz.TokenSortRatio("order words out of", " words out of order"); + } + + [Benchmark] + public int PartialTokenSortRatioClassic() + { + return ClassicFuzz.PartialTokenSortRatio("order words out of", " words out of order"); + } + + [Benchmark] + public int TokenSetRatioClassic() + { + return ClassicFuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + } + + [Benchmark] + public int PartialTokenSetRatioClassic() + { + return ClassicFuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + } + + [Benchmark] + public int WeightedRatioClassic() + { + return ClassicFuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); + } + + [Benchmark] + public int TokenInitialismRatio1Classic() + { + return ClassicFuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); + } + + [Benchmark] + public int TokenInitialismRatio2Classic() + { + return ClassicFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); + } + + [Benchmark] + public int TokenInitialismRatio3Classic() + { + return ClassicFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + } + + [Benchmark] + public int PartialTokenInitialismRatioClassic() + { + return ClassicFuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + } + + [Benchmark] + public int TokenAbbreviationRatioClassic() + { + return ClassicFuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", ClassicPreprocess.PreprocessMode.Full); + } + + [Benchmark] + public int PartialTokenAbbreviationRatioClassic() + { + return ClassicFuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", ClassicPreprocess.PreprocessMode.Full); } } \ No newline at end of file diff --git a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj index 5f3c618..8c75efc 100644 --- a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj +++ b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj @@ -5,12 +5,13 @@ net8.0 enable enable - Raffinert.$(MSBuildProjectName) + $(MSBuildProjectName) Raffinert.$(MSBuildProjectName.Replace(" ", "_")) + From a0aade06a1a9931f7814e86f868ef8bf63f8215f Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sat, 17 Aug 2024 19:47:13 +0200 Subject: [PATCH 22/30] + ExtractOne benchmark --- FuzzySharp.Benchmarks/BenchmarkAll.cs | 82 +++++++++++++++++---------- 1 file changed, 51 insertions(+), 31 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs index f3d73d0..5835134 100644 --- a/FuzzySharp.Benchmarks/BenchmarkAll.cs +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -1,8 +1,7 @@ using BenchmarkDotNet.Attributes; +using Raffinert.FuzzySharp.Extractor; using Raffinert.FuzzySharp.PreProcess; -using RaffinertFuzz = Raffinert.FuzzySharp.Fuzz; -using ClassicFuzz = FuzzySharp.Fuzz; -using ClassicPreprocess = FuzzySharp.PreProcess; +using Classic = FuzzySharp; namespace Raffinert.FuzzySharp.Benchmarks; @@ -12,168 +11,189 @@ public class BenchmarkAll [Benchmark] public int Ratio1() { - return RaffinertFuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); + return Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); } [Benchmark] public int Ratio2() { - return RaffinertFuzz.Ratio("mysmilarstring", "mysimilarstring"); + return Fuzz.Ratio("mysmilarstring", "mysimilarstring"); } [Benchmark] public int PartialRatio() { - return RaffinertFuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + return Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); } [Benchmark] public int TokenSortRatio() { - return RaffinertFuzz.TokenSortRatio("order words out of", " words out of order"); + return Fuzz.TokenSortRatio("order words out of", " words out of order"); } [Benchmark] public int PartialTokenSortRatio() { - return RaffinertFuzz.PartialTokenSortRatio("order words out of", " words out of order"); + return Fuzz.PartialTokenSortRatio("order words out of", " words out of order"); } [Benchmark] public int TokenSetRatio() { - return RaffinertFuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + return Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); } [Benchmark] public int PartialTokenSetRatio() { - return RaffinertFuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + return Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); } [Benchmark] public int WeightedRatio() { - return RaffinertFuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); + return Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); } [Benchmark] public int TokenInitialismRatio1() { - return RaffinertFuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); } [Benchmark] public int TokenInitialismRatio2() { - return RaffinertFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); } [Benchmark] public int TokenInitialismRatio3() { - return RaffinertFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + return Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); } [Benchmark] public int PartialTokenInitialismRatio() { - return RaffinertFuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + return Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); } [Benchmark] public int TokenAbbreviationRatio() { - return RaffinertFuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + return Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); } [Benchmark] public int PartialTokenAbbreviationRatio() { - return RaffinertFuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); + return Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); } [Benchmark] public int Ratio1Classic() { - return ClassicFuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); + return Classic.Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); } [Benchmark] public int Ratio2Classic() { - return ClassicFuzz.Ratio("mysmilarstring", "mysimilarstring"); + return Classic.Fuzz.Ratio("mysmilarstring", "mysimilarstring"); } [Benchmark] public int PartialRatioClassic() { - return ClassicFuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); + return Classic.Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); } [Benchmark] public int TokenSortRatioClassic() { - return ClassicFuzz.TokenSortRatio("order words out of", " words out of order"); + return Classic.Fuzz.TokenSortRatio("order words out of", " words out of order"); } [Benchmark] public int PartialTokenSortRatioClassic() { - return ClassicFuzz.PartialTokenSortRatio("order words out of", " words out of order"); + return Classic.Fuzz.PartialTokenSortRatio("order words out of", " words out of order"); } [Benchmark] public int TokenSetRatioClassic() { - return ClassicFuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + return Classic.Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); } [Benchmark] public int PartialTokenSetRatioClassic() { - return ClassicFuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); + return Classic.Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); } [Benchmark] public int WeightedRatioClassic() { - return ClassicFuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); + return Classic.Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); } [Benchmark] public int TokenInitialismRatio1Classic() { - return ClassicFuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); + return Classic.Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); } [Benchmark] public int TokenInitialismRatio2Classic() { - return ClassicFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); + return Classic.Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); } [Benchmark] public int TokenInitialismRatio3Classic() { - return ClassicFuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + return Classic.Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); } [Benchmark] public int PartialTokenInitialismRatioClassic() { - return ClassicFuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); + return Classic.Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); } [Benchmark] public int TokenAbbreviationRatioClassic() { - return ClassicFuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", ClassicPreprocess.PreprocessMode.Full); + return Classic.Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", Classic.PreProcess.PreprocessMode.Full); } [Benchmark] public int PartialTokenAbbreviationRatioClassic() { - return ClassicFuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", ClassicPreprocess.PreprocessMode.Full); + return Classic.Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", Classic.PreProcess.PreprocessMode.Full); + } + + private static readonly string[][] Events = + [ + ["chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm"], + ["new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm"], + ["atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm"] + ]; + + private static readonly string[] Query = ["new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm"]; + + [Benchmark] + public ExtractedResult ExtractOne() + { + return Process.ExtractOne(Query, Events, static strings => strings[0]); + } + + [Benchmark] + public Classic.Extractor.ExtractedResult ExtractOneClassic() + { + return Classic.Process.ExtractOne(Query, Events, static strings => strings[0]); } } \ No newline at end of file From d0a80e57c9fdef7160aed4ed84203647b392e66f Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 11:01:53 +0200 Subject: [PATCH 23/30] feature: Extract method with (string query, IEnumerable choices) signature #46 add Fastenshtein to perf comparison --- FuzzySharp.Benchmarks/BenchmarkAll.cs | 12 +++++++ .../FuzzySharp.Benchmarks.csproj | 1 + FuzzySharp.Benchmarks/Program.cs | 34 ++++++++++++++++++- FuzzySharp/Extractor/ResultExtractor.cs | 11 ++++-- FuzzySharp/Process.cs | 21 ++++++++++++ 5 files changed, 75 insertions(+), 4 deletions(-) diff --git a/FuzzySharp.Benchmarks/BenchmarkAll.cs b/FuzzySharp.Benchmarks/BenchmarkAll.cs index 5835134..e247b33 100644 --- a/FuzzySharp.Benchmarks/BenchmarkAll.cs +++ b/FuzzySharp.Benchmarks/BenchmarkAll.cs @@ -196,4 +196,16 @@ public Classic.Extractor.ExtractedResult ExtractOneClassic() { return Classic.Process.ExtractOne(Query, Events, static strings => strings[0]); } + + [Benchmark] + public int LevenshteinDistance() + { + return Levenshtein.EditDistance("chicago cubs vs new york mets".AsSpan(), "new york mets vs chicago cubs".AsSpan()); + } + + [Benchmark] + public int FastenshteinDistance() + { + return Fastenshtein.Levenshtein.Distance("chicago cubs vs new york mets", "new york mets vs chicago cubs"); + } } \ No newline at end of file diff --git a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj index 8c75efc..44f0796 100644 --- a/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj +++ b/FuzzySharp.Benchmarks/FuzzySharp.Benchmarks.csproj @@ -11,6 +11,7 @@ + diff --git a/FuzzySharp.Benchmarks/Program.cs b/FuzzySharp.Benchmarks/Program.cs index 9f90b4d..7c50760 100644 --- a/FuzzySharp.Benchmarks/Program.cs +++ b/FuzzySharp.Benchmarks/Program.cs @@ -1,6 +1,38 @@ using BenchmarkDotNet.Running; -using Raffinert.FuzzySharp; +using Raffinert.FuzzySharp.Benchmarks; +//using Raffinert.FuzzySharp; +//using Raffinert.FuzzySharp.SimilarityRatio; +//using Raffinert.FuzzySharp.SimilarityRatio.Scorer.Composite; +//using Classic = FuzzySharp; BenchmarkRunner.Run(typeof(Program).Assembly); +//var input1 = "+30.0% Damage to Close Enemies [30.01%"; +//var input2Collection = new[] +//{ +// "+#% Damage", +// "+#% Damage to Crowd Controlled Enemies", +// "+#% Damage to Close Enemies", +// "+#% Damage to Chilled Enemies", +// "+#% Damage to Poisoned Enemies", +// "#% Block Chance#% Blocked Damage Reduction", +// "#% Damage Reduction from Bleeding Enemies", +// "#% Damage Reduction", +// "+#% Cold Damage" +//}; + +//var classicScorer = Classic.SimilarityRatio.ScorerCache.Get(); + +//Func classicScorerFunc = input2 => classicScorer.Score(input1, input2); + +//var classicResult = input2Collection.Select(classicScorerFunc).ToList(); + +//var scorer = ScorerCache.Get(); + +//Func scorerFunc = input2 => scorer.Score(input1, input2); + +//var result = input2Collection.Select(scorerFunc).ToList(); + +//Console.WriteLine(); + //Console.WriteLine(Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog")); \ No newline at end of file diff --git a/FuzzySharp/Extractor/ResultExtractor.cs b/FuzzySharp/Extractor/ResultExtractor.cs index b0bb657..b173944 100644 --- a/FuzzySharp/Extractor/ResultExtractor.cs +++ b/FuzzySharp/Extractor/ResultExtractor.cs @@ -8,13 +8,12 @@ namespace Raffinert.FuzzySharp.Extractor { public static class ResultExtractor { - public static IEnumerable> ExtractWithoutOrder(T query, IEnumerable choices, Func processor, IRatioScorer scorer, int cutoff = 0) + public static IEnumerable> ExtractWithoutOrder(string query, IEnumerable choices, Func processor, IRatioScorer scorer, int cutoff = 0) { int index = 0; - var processedQuery = processor(query); foreach (var choice in choices) { - int score = scorer.Score(processedQuery, processor(choice)); + int score = scorer.Score(query, processor(choice)); if (score >= cutoff) { yield return new ExtractedResult(choice, score, index); @@ -23,6 +22,12 @@ public static IEnumerable> ExtractWithoutOrder(T query, IE } } + public static IEnumerable> ExtractWithoutOrder(T query, IEnumerable choices, Func processor, IRatioScorer scorer, int cutoff = 0) + { + var processedQuery = processor(query); + return ExtractWithoutOrder(processedQuery, choices, processor, scorer, cutoff); + } + public static ExtractedResult ExtractOne(T query, IEnumerable choices, Func processor, IRatioScorer calculator, int cutoff = 0) { return ExtractWithoutOrder(query, choices, processor, calculator, cutoff).Max(); diff --git a/FuzzySharp/Process.cs b/FuzzySharp/Process.cs index f96bbd7..b3f5109 100644 --- a/FuzzySharp/Process.cs +++ b/FuzzySharp/Process.cs @@ -56,6 +56,27 @@ public static IEnumerable> ExtractAll( scorer ??= s_defaultScorer; return ResultExtractor.ExtractWithoutOrder(query, choices, processor, scorer, cutoff); } + + /// + /// Creates a list of ExtractedResult which contain all the choices with + /// their corresponding score where higher is more similar + /// + /// + /// + /// + /// + /// + /// + public static IEnumerable> ExtractAll( + string query, + IEnumerable choices, + Func processor, + IRatioScorer scorer = null, + int cutoff = 0) + { + scorer ??= s_defaultScorer; + return ResultExtractor.ExtractWithoutOrder(query, choices, processor, scorer, cutoff); + } #endregion #region ExtractTop From e336f0d88e1b66688532223031ee93f05f7cc633 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 12:14:17 +0200 Subject: [PATCH 24/30] readme --- README.md | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 77fdf38..29f73ee 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,18 @@ -# FuzzySharp +# Raffinert.FuzzySharp + C# .NET fuzzy string matching implementation of Seat Geek's well known python FuzzyWuzzy algorithm. +A refined version of original [FuzzySharp](https://github.com/JakeBayer/FuzzySharp). The original one looks abandoned. + # Release Notes: +v.2.0.3 + +Accent to performantce and allocations. +Fixed some problems with local languages caused by Regex("a-zA-Z"). All regexps were replaced with string manipulations (it also fixes https://github.com/JakeBayer/FuzzySharp/pull/7) +Remove unnecessary ConcurrentDictionary: reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42) +Remove support of outdated/vulnerable platforms netcoreapp2.0;netcoreapp2.1;netstandard1.6 +Closed [Issue!46 - Extract method with (string query, IEnumerable choices) signature]!(https://github.com/JakeBayer/FuzzySharp/issues/46) + v.2.0.0 As of 2.0.0, all empty strings will return a score of 0. Prior, the partial scoring system would return a score of 100, regardless if the other input had correct value or not. This was a result of the partial scoring system returning an empty set for the matching blocks As a result, this led to incorrrect values in the composite scores; several of them (token set, token sort), relied on the prior value of empty strings. @@ -11,7 +22,7 @@ As a result, many 1.X.X unit test may be broken with the 2.X.X upgrade, but it i ## Usage -Install-Package FuzzySharp +Install-Package Raffinert.FuzzySharp #### Simple Ratio ```csharp From 0d20b55f18344e6aeca00a1fd70bae37b8bc7fbb Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 12:17:48 +0200 Subject: [PATCH 25/30] add benchmark --- ...p.Benchmarks.BenchmarkAll-report-github.md | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) create mode 100644 FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md diff --git a/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md new file mode 100644 index 0000000..eab6cc3 --- /dev/null +++ b/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md @@ -0,0 +1,44 @@ +``` + +BenchmarkDotNet v0.14.0, Windows 11 (10.0.22631.4037/23H2/2023Update/SunValley3) +12th Gen Intel Core i7-1255U, 1 CPU, 12 logical and 10 physical cores +.NET SDK 8.0.400 + [Host] : .NET 8.0.8 (8.0.824.36612), X64 RyuJIT AVX2 + DefaultJob : .NET 8.0.8 (8.0.824.36612), X64 RyuJIT AVX2 + + +``` +| Method | Mean | Error | StdDev | Median | Gen0 | Gen1 | Allocated | +|------------------------------------- |-------------:|-------------:|-------------:|-------------:|-------:|-------:|----------:| +| Ratio1 | 206.81 ns | 2.409 ns | 2.136 ns | 207.29 ns | 0.0165 | - | 104 B | +| Ratio2 | 13.76 ns | 0.319 ns | 0.899 ns | 13.29 ns | - | - | - | +| PartialRatio | 723.24 ns | 22.148 ns | 59.498 ns | 692.31 ns | 0.3786 | 0.0010 | 2376 B | +| TokenSortRatio | 801.65 ns | 54.000 ns | 159.219 ns | 882.82 ns | 0.0896 | - | 568 B | +| PartialTokenSortRatio | 899.87 ns | 30.597 ns | 89.254 ns | 921.35 ns | 0.1154 | - | 728 B | +| TokenSetRatio | 1,093.68 ns | 28.071 ns | 80.993 ns | 1,096.79 ns | 0.3500 | - | 2200 B | +| PartialTokenSetRatio | 1,380.95 ns | 52.967 ns | 154.507 ns | 1,392.58 ns | 0.5112 | - | 3208 B | +| WeightedRatio | 12,561.44 ns | 767.193 ns | 2,225.766 ns | 13,232.62 ns | 0.7935 | - | 5072 B | +| TokenInitialismRatio1 | 294.56 ns | 6.757 ns | 18.946 ns | 297.41 ns | 0.0625 | - | 392 B | +| TokenInitialismRatio2 | 275.14 ns | 5.562 ns | 15.503 ns | 272.03 ns | 0.0548 | - | 344 B | +| TokenInitialismRatio3 | 542.62 ns | 10.893 ns | 29.635 ns | 541.23 ns | 0.1106 | - | 696 B | +| PartialTokenInitialismRatio | 749.64 ns | 15.039 ns | 32.373 ns | 744.13 ns | 0.1845 | - | 1160 B | +| TokenAbbreviationRatio | 1,270.08 ns | 24.756 ns | 41.361 ns | 1,255.59 ns | 0.2508 | - | 1576 B | +| PartialTokenAbbreviationRatio | 1,536.55 ns | 45.771 ns | 129.097 ns | 1,561.22 ns | 0.3357 | - | 2112 B | +| Ratio1Classic | 677.17 ns | 13.437 ns | 29.212 ns | 681.43 ns | 0.0505 | - | 320 B | +| Ratio2Classic | 104.42 ns | 2.102 ns | 3.626 ns | 105.17 ns | 0.0318 | - | 200 B | +| PartialRatioClassic | 2,249.40 ns | 44.588 ns | 118.242 ns | 2,274.26 ns | 0.5360 | 0.0019 | 3368 B | +| TokenSortRatioClassic | 3,071.78 ns | 92.892 ns | 266.524 ns | 3,143.59 ns | 0.3510 | - | 2216 B | +| PartialTokenSortRatioClassic | 3,317.62 ns | 64.881 ns | 82.054 ns | 3,327.15 ns | 0.4005 | - | 2536 B | +| TokenSetRatioClassic | 4,309.09 ns | 85.081 ns | 184.959 ns | 4,337.85 ns | 0.6905 | - | 4352 B | +| PartialTokenSetRatioClassic | 4,771.35 ns | 92.361 ns | 230.012 ns | 4,849.64 ns | 0.9308 | - | 5840 B | +| WeightedRatioClassic | 24,181.32 ns | 721.231 ns | 2,046.011 ns | 24,472.06 ns | 2.1362 | - | 13482 B | +| TokenInitialismRatio1Classic | 1,041.92 ns | 20.745 ns | 39.470 ns | 1,044.25 ns | 0.1440 | - | 904 B | +| TokenInitialismRatio2Classic | 824.97 ns | 26.765 ns | 75.051 ns | 844.97 ns | 0.1173 | - | 736 B | +| TokenInitialismRatio3Classic | 1,971.98 ns | 39.316 ns | 91.901 ns | 1,989.39 ns | 0.2460 | - | 1552 B | +| PartialTokenInitialismRatioClassic | 2,249.70 ns | 44.057 ns | 65.943 ns | 2,259.86 ns | 0.3414 | - | 2144 B | +| TokenAbbreviationRatioClassic | 2,727.98 ns | 84.791 ns | 241.914 ns | 2,779.33 ns | 0.4730 | - | 2984 B | +| PartialTokenAbbreviationRatioClassic | 3,162.92 ns | 88.249 ns | 247.460 ns | 3,193.32 ns | 0.6180 | - | 3896 B | +| ExtractOne | 33,770.23 ns | 1,260.134 ns | 3,595.234 ns | 34,371.46 ns | 1.8616 | - | 11728 B | +| ExtractOneClassic | 54,594.63 ns | 1,971.629 ns | 5,625.169 ns | 55,347.68 ns | 4.5776 | - | 29011 B | +| LevenshteinDistance | 2,096.37 ns | 58.508 ns | 167.872 ns | 2,141.95 ns | 0.0229 | - | 144 B | +| FastenshteinDistance | 1,533.82 ns | 38.323 ns | 108.715 ns | 1,564.52 ns | 0.0229 | - | 144 B | From 48ec28bd28d96e6354c78554a4ea673bfcdb5402 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 12:23:02 +0200 Subject: [PATCH 26/30] fix names, versions, formatting --- FuzzySharp/FuzzySharp.csproj | 46 ++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index 856b450..c766176 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -1,32 +1,32 @@  - 1.0.5.0 - Jacob Bayer;Yevhen Cherkes - - Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek - 1.0.5.0 - true - true - 12.0 - MIT - https://github.com/JakeBayer/FuzzySharp - Include source link - false - Fuzzy String Matching Comparison FuzzyWuzzy FuzzySharp - true - git - https://github.com/JakeBayer/FuzzySharp - snupkg - netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;NET60;NET80 - 2.0.3 - Raffinert.$(MSBuildProjectName) - Raffinert.$(MSBuildProjectName.Replace(" ", "_")) + 2.0.3.0 + Jacob Bayer;Yevhen Cherkes + + Fuzzy string matcher based on FuzzyWuzzy algorithm from SeatGeek + 2.0.3.0 + true + true + 12.0 + MIT + https://github.com/Raffinert/FuzzySharp + Performance, allocations + false + Fuzzy String Matching Comparison FuzzyWuzzy FuzzySharp + true + git + https://github.com/Raffinert/FuzzySharp + snupkg + netstandard2.0;netstandard2.1;netcoreapp3.1;net45;net46;net461;net472;net48;NET60;NET80 + 2.0.3 + Raffinert.$(MSBuildProjectName) + Raffinert.$(MSBuildProjectName.Replace(" ", "_")) - - + + From dc2b858dc4cc56d8cdf26411904e255a019b0549 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 12:27:53 +0200 Subject: [PATCH 27/30] readme --- FuzzySharp/README.md | 26 +++++++++++++++++++++----- README.md | 12 ------------ 2 files changed, 21 insertions(+), 17 deletions(-) diff --git a/FuzzySharp/README.md b/FuzzySharp/README.md index 14835b5..d89e11e 100644 --- a/FuzzySharp/README.md +++ b/FuzzySharp/README.md @@ -1,12 +1,28 @@ -# FuzzySharp +# Raffinert.FuzzySharp + C# .NET fuzzy string matching implementation of Seat Geek's well known python FuzzyWuzzy algorithm. -## Usage +A refined version of original [FuzzySharp](https://github.com/JakeBayer/FuzzySharp). The original one looks abandoned. + +# Release Notes: +v.2.0.3 + +Accent to performantce and allocations. +Fixed some problems with local languages caused by Regex("a-zA-Z"). All regexps were replaced with string manipulations (it also fixes https://github.com/JakeBayer/FuzzySharp/pull/7) +Remove unnecessary ConcurrentDictionary: reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42) +Remove support of outdated/vulnerable platforms netcoreapp2.0;netcoreapp2.1;netstandard1.6 +Closed [Issue!46 - Extract method with (string query, IEnumerable choices) signature]!(https://github.com/JakeBayer/FuzzySharp/issues/46) + +v.2.0.0 -Install-Package FuzzySharp -Version 2.0.1 +As of 2.0.0, all empty strings will return a score of 0. Prior, the partial scoring system would return a score of 100, regardless if the other input had correct value or not. This was a result of the partial scoring system returning an empty set for the matching blocks As a result, this led to incorrrect values in the composite scores; several of them (token set, token sort), relied on the prior value of empty strings. + +As a result, many 1.X.X unit test may be broken with the 2.X.X upgrade, but it is within the expertise fo all the 1.X.X developers to recommednd the upgrade to the 2.X.X series regardless, should their version accommodate it or not, as it is closer to the ideal behavior of the library. + + +## Usage -## NOTES -As of version 2.0.0, if either test string is an empty string, the scorers will return a score of 0. Previously this was returning 100 for all partial ratios, which was causing severe issues for some fo the compound scorers. +Install-Package Raffinert.FuzzySharp #### Simple Ratio ```csharp diff --git a/README.md b/README.md index 29f73ee..d89e11e 100644 --- a/README.md +++ b/README.md @@ -124,18 +124,6 @@ var best = Process.ExtractOne(query, events, strings => strings[0]); best: (value: { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" }, score: 95, index: 0) ``` -### FuzzySharp in Different Languages -FuzzySharp was written with English in mind, and as such the Default string preprocessor only looks at English alphanumeric characters in the input strings, and will strip all others out. However, the `Extract` methods in the `Process` class do provide the option to specify your own string preprocessor. If this parameter is omitted, the Default will be used. However if you provide your own, the provided one will be used, so you are free to provide your own criteria for whatever character set you want to admit. For instance, using the parameter `(s) => s` will prevent the string from being altered at all before being run through the similarity algorithms. - -E.g., - -```csharp -var query = "strng"; -var choices = new [] { "stríng", "stráng", "stréng" }; -var results = Process.ExtractAll(query, choices, (s) => s); -``` -The above will run the similarity algorithm on all the choices without stripping out the accented characters. - ### Using Different Scorers Scoring strategies are stateless, and as such should be static. However, in order to get them to share all the code they have in common via inheritance, making them static was not possible. Currently one way around having to new up an instance everytime you want to use one is to use the cache. This will ensure only one instance of each scorer ever exists. From c2d5ddeb6721d74d64d19b14e88611cb543c76c7 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 12:48:12 +0200 Subject: [PATCH 28/30] remove readme duplicate --- FuzzySharp/FuzzySharp.csproj | 7 +- FuzzySharp/README.md | 149 ----------------------------------- README.md | 4 +- 3 files changed, 6 insertions(+), 154 deletions(-) delete mode 100644 FuzzySharp/README.md diff --git a/FuzzySharp/FuzzySharp.csproj b/FuzzySharp/FuzzySharp.csproj index c766176..3217e89 100644 --- a/FuzzySharp/FuzzySharp.csproj +++ b/FuzzySharp/FuzzySharp.csproj @@ -22,6 +22,7 @@ 2.0.3 Raffinert.$(MSBuildProjectName) Raffinert.$(MSBuildProjectName.Replace(" ", "_")) + README.md @@ -32,8 +33,8 @@ - - - + + + diff --git a/FuzzySharp/README.md b/FuzzySharp/README.md deleted file mode 100644 index d89e11e..0000000 --- a/FuzzySharp/README.md +++ /dev/null @@ -1,149 +0,0 @@ -# Raffinert.FuzzySharp - -C# .NET fuzzy string matching implementation of Seat Geek's well known python FuzzyWuzzy algorithm. - -A refined version of original [FuzzySharp](https://github.com/JakeBayer/FuzzySharp). The original one looks abandoned. - -# Release Notes: -v.2.0.3 - -Accent to performantce and allocations. -Fixed some problems with local languages caused by Regex("a-zA-Z"). All regexps were replaced with string manipulations (it also fixes https://github.com/JakeBayer/FuzzySharp/pull/7) -Remove unnecessary ConcurrentDictionary: reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42) -Remove support of outdated/vulnerable platforms netcoreapp2.0;netcoreapp2.1;netstandard1.6 -Closed [Issue!46 - Extract method with (string query, IEnumerable choices) signature]!(https://github.com/JakeBayer/FuzzySharp/issues/46) - -v.2.0.0 - -As of 2.0.0, all empty strings will return a score of 0. Prior, the partial scoring system would return a score of 100, regardless if the other input had correct value or not. This was a result of the partial scoring system returning an empty set for the matching blocks As a result, this led to incorrrect values in the composite scores; several of them (token set, token sort), relied on the prior value of empty strings. - -As a result, many 1.X.X unit test may be broken with the 2.X.X upgrade, but it is within the expertise fo all the 1.X.X developers to recommednd the upgrade to the 2.X.X series regardless, should their version accommodate it or not, as it is closer to the ideal behavior of the library. - - -## Usage - -Install-Package Raffinert.FuzzySharp - -#### Simple Ratio -```csharp -Fuzz.Ratio("mysmilarstring","myawfullysimilarstirng") -72 -Fuzz.Ratio("mysmilarstring","mysimilarstring") -97 -``` - -#### Partial Ratio -```csharp -Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring") -71 -``` - -#### Token Sort Ratio -```csharp -Fuzz.TokenSortRatio("order words out of"," words out of order") -100 -Fuzz.PartialTokenSortRatio("order words out of"," words out of order") -100 -``` - -#### Token Set Ratio -```csharp -Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear") -100 -Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear") -100 -``` - -#### Token Initialism Ratio -```csharp -Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); -89 -Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); -100 - -Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); -53 -Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); -100 -``` - -#### Token Abbreviation Ratio -```csharp -Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); -40 -Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); -50 -``` - - -#### Weighted Ratio -```csharp -Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog") -95 -``` - -#### Process -```csharp -Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys"}) -(string: Dallas Cowboys, score: 90, index: 3) -``` -```csharp -Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3); -[(string: google, score: 83, index: 0), (string: googleplus, score: 75, index: 5), (string: plexoogl, score: 43, index: 7)] -``` -```csharp -Process.ExtractAll("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }) -[(string: google, score: 83, index: 0), (string: bing, score: 22, index: 1), (string: facebook, score: 29, index: 2), (string: linkedin, score: 29, index: 3), (string: twitter, score: 15, index: 4), (string: googleplus, score: 75, index: 5), (string: bingnews, score: 29, index: 6), (string: plexoogl, score: 43, index: 7)] -// score cutoff -Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40) -[(string: google, score: 83, index: 0), (string: googleplus, score: 75, index: 5), (string: plexoogl, score: 43, index: 7)] -``` -```csharp -Process.ExtractSorted("goolge", new [] {"google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }) -[(string: google, score: 83, index: 0), (string: googleplus, score: 75, index: 5), (string: plexoogl, score: 43, index: 7), (string: facebook, score: 29, index: 2), (string: linkedin, score: 29, index: 3), (string: bingnews, score: 29, index: 6), (string: bing, score: 22, index: 1), (string: twitter, score: 15, index: 4)] -``` - -Extraction will use `WeightedRatio` and `full process` by default. Override these in the method parameters to use different scorers and processing. -Here we use the Fuzz.Ratio scorer and keep the strings as is, instead of Full Process (which will .ToLowercase() before comparing) -```csharp -Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get()); -(string: Dallas Cowboys, score: 57, index: 3) -``` - -Extraction can operate on objects of similar type. Use the "process" parameter to reduce the object to the string which it should be compared on. In the following example, the object is an array that contains the matchup, the arena, the date, and the time. We are matching on the first (0 index) parameter, the matchup. -```csharp -var events = new[] -{ - new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" }, - new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" }, - new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" }, -}; -var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" }; -var best = Process.ExtractOne(query, events, strings => strings[0]); - -best: (value: { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" }, score: 95, index: 0) -``` - -### Using Different Scorers -Scoring strategies are stateless, and as such should be static. However, in order to get them to share all the code they have in common via inheritance, making them static was not possible. -Currently one way around having to new up an instance everytime you want to use one is to use the cache. This will ensure only one instance of each scorer ever exists. -```csharp -var ratio = ScorerCache.Get(); -var partialRatio = ScorerCache.Get(); -var tokenSet = ScorerCache.Get(); -var partialTokenSet = ScorerCache.Get(); -var tokenSort = ScorerCache.Get(); -var partialTokenSort = ScorerCache.Get(); -var tokenAbbreviation = ScorerCache.Get(); -var partialTokenAbbreviation = ScorerCache.Get(); -var weighted = ScorerCache.Get(); -``` - -## Credits - -- SeatGeek -- Adam Cohen -- David Necas (python-Levenshtein) -- Mikko Ohtamaa (python-Levenshtein) -- Antti Haapala (python-Levenshtein) -- Panayiotis (Java implementation I heavily borrowed from) diff --git a/README.md b/README.md index d89e11e..5bfe01d 100644 --- a/README.md +++ b/README.md @@ -7,9 +7,9 @@ A refined version of original [FuzzySharp](https://github.com/JakeBayer/FuzzySha # Release Notes: v.2.0.3 -Accent to performantce and allocations. +Accent to performantce and allocations. See [Benchmark](https://github.com/Raffinert/FuzzySharp/blob/dc2b858dc4cc56d8cdf26411904e255a019b0549/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md) Fixed some problems with local languages caused by Regex("a-zA-Z"). All regexps were replaced with string manipulations (it also fixes https://github.com/JakeBayer/FuzzySharp/pull/7) -Remove unnecessary ConcurrentDictionary: reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42) +Extra performance improvement, reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42) Remove support of outdated/vulnerable platforms netcoreapp2.0;netcoreapp2.1;netstandard1.6 Closed [Issue!46 - Extract method with (string query, IEnumerable choices) signature]!(https://github.com/JakeBayer/FuzzySharp/issues/46) From b27faf3e10e2acff26ed376e9dd710917bc188fb Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 12:58:13 +0200 Subject: [PATCH 29/30] readme --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5bfe01d..aab2c7c 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,11 @@ A refined version of original [FuzzySharp](https://github.com/JakeBayer/FuzzySha # Release Notes: v.2.0.3 -Accent to performantce and allocations. See [Benchmark](https://github.com/Raffinert/FuzzySharp/blob/dc2b858dc4cc56d8cdf26411904e255a019b0549/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md) -Fixed some problems with local languages caused by Regex("a-zA-Z"). All regexps were replaced with string manipulations (it also fixes https://github.com/JakeBayer/FuzzySharp/pull/7) -Extra performance improvement, reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42) -Remove support of outdated/vulnerable platforms netcoreapp2.0;netcoreapp2.1;netstandard1.6 -Closed [Issue!46 - Extract method with (string query, IEnumerable choices) signature]!(https://github.com/JakeBayer/FuzzySharp/issues/46) +Accent to performantce and allocations. See [Benchmark](https://github.com/Raffinert/FuzzySharp/blob/dc2b858dc4cc56d8cdf26411904e255a019b0549/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md). +Support local languages more naturally (removed regexps "a-zA-Z"). All regexps were replaced with string manipulations (fixes [PR!7](https://github.com/JakeBayer/FuzzySharp/pull/7)). +Extra performance improvement, reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42). +Implemented new Process.ExtractAll method, see [Issue!46](https://github.com/JakeBayer/FuzzySharp/issues/46). +Remove support of outdated/vulnerable platforms netcoreapp2.0;netcoreapp2.1;netstandard1.6. v.2.0.0 From 9646f2bad78a83ff79d6cb45e7249363c367a0d8 Mon Sep 17 00:00:00 2001 From: Yevhen Cherkes Date: Sun, 18 Aug 2024 13:02:57 +0200 Subject: [PATCH 30/30] fix a typo --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index aab2c7c..0cd0a0c 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A refined version of original [FuzzySharp](https://github.com/JakeBayer/FuzzySha # Release Notes: v.2.0.3 -Accent to performantce and allocations. See [Benchmark](https://github.com/Raffinert/FuzzySharp/blob/dc2b858dc4cc56d8cdf26411904e255a019b0549/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md). +Accent to performance and allocations. See [Benchmark](https://github.com/Raffinert/FuzzySharp/blob/dc2b858dc4cc56d8cdf26411904e255a019b0549/FuzzySharp.Benchmarks/BenchmarkDotNet.Artifacts/results/Raffinert.FuzzySharp.Benchmarks.BenchmarkAll-report-github.md). Support local languages more naturally (removed regexps "a-zA-Z"). All regexps were replaced with string manipulations (fixes [PR!7](https://github.com/JakeBayer/FuzzySharp/pull/7)). Extra performance improvement, reused approach [Dmitry Sushchevsky](https://github.com/blowin) - see [PR!42](https://github.com/JakeBayer/FuzzySharp/pull/42). Implemented new Process.ExtractAll method, see [Issue!46](https://github.com/JakeBayer/FuzzySharp/issues/46).