From 10c4044216fa1aaf7a276328701fd95ae5782b18 Mon Sep 17 00:00:00 2001 From: Laurence Billingham Date: Fri, 18 Feb 2022 18:21:48 +0000 Subject: [PATCH 1/7] test: :test_tube: :recycle: move tests of float functionality --- NotesLog.md | 8 ++++++++ tests/test_floats_in_pairs_and_triplets.py | 18 ++++++++++++++++++ tests/test_pairs_that_sum.py | 8 -------- tests/test_triplets_that_sum.py | 8 -------- 4 files changed, 26 insertions(+), 16 deletions(-) create mode 100644 tests/test_floats_in_pairs_and_triplets.py diff --git a/NotesLog.md b/NotesLog.md index c33f50c..f7819e2 100644 --- a/NotesLog.md +++ b/NotesLog.md @@ -57,3 +57,11 @@ test_input1_pairs 5.4665 (1.0) 6.2297 (1.0) 5.6687 (1.0) test_input1_triplets 384.6154 (70.36) 386.5000 (62.04) 385.4776 (68.00) 0.8287 (8.14) 385.4333 (68.13) 1.5047 (11.67) 2;0 2.5942 (0.01) 5 1 --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- ``` + +## if you want to go faster + +We're going to have to break apart the itertools into the underlying nested loops +before we can mess with the aglo. + +but 1st lets split out the float-y bits of the tests, because i think we might want to +make it easier to stop supporting floats and lose those tests. diff --git a/tests/test_floats_in_pairs_and_triplets.py b/tests/test_floats_in_pairs_and_triplets.py new file mode 100644 index 0000000..c36048a --- /dev/null +++ b/tests/test_floats_in_pairs_and_triplets.py @@ -0,0 +1,18 @@ +from math import e, pi, sqrt + +import pytest +from tuplesumfilter import pairs_that_sum_to, triplets_that_sum_to + + +def test_triplets_works_approx_with_floats(): + nums = [pi, sqrt(2), 17.45, 1e-10, e] + target = pi + e + sqrt(2) + expected = [(pytest.approx(pi), pytest.approx(sqrt(2)), pytest.approx(e))] + assert triplets_that_sum_to(nums, target) == expected + + +def test_pairs_works_approx_with_floats(): + nums = [pi, sqrt(2), 17.45, 1e-10, e] + target = pi + e + expected = [(pytest.approx(pi), pytest.approx(e))] + assert pairs_that_sum_to(nums, target) == expected diff --git a/tests/test_pairs_that_sum.py b/tests/test_pairs_that_sum.py index 671518f..7050da2 100644 --- a/tests/test_pairs_that_sum.py +++ b/tests/test_pairs_that_sum.py @@ -1,4 +1,3 @@ -from math import e, pi, sqrt import pytest from tuplesumfilter import pairs_that_sum_to @@ -28,13 +27,6 @@ def test_pairs_with_finite_input_but_no_match(worked_example_nums): assert pairs_that_sum_to(worked_example_nums, 0) == [] -def test_pairs_works_approx_with_floats(): - nums = [pi, sqrt(2), 17.45, 1e-10, e] - target = pi + e - expected = [(pytest.approx(pi), pytest.approx(e))] - assert pairs_that_sum_to(nums, target) == expected - - def test_pairs_when_multiple_matches(): example_input = [1, 979, 6, 299, 2, 1456, 5] assert pairs_that_sum_to(example_input, 7) == [(1, 6), (2, 5)] diff --git a/tests/test_triplets_that_sum.py b/tests/test_triplets_that_sum.py index 9b3a100..d4fe1c0 100644 --- a/tests/test_triplets_that_sum.py +++ b/tests/test_triplets_that_sum.py @@ -1,4 +1,3 @@ -from math import e, pi, sqrt import pytest from tuplesumfilter import triplets_that_sum_to @@ -32,13 +31,6 @@ def test_triplets_with_finite_input_but_no_match(worked_example_nums): assert triplets_that_sum_to(worked_example_nums, 0) == [] -def test_triplets_works_approx_with_floats(): - nums = [pi, sqrt(2), 17.45, 1e-10, e] - target = pi + e + sqrt(2) - expected = [(pytest.approx(pi), pytest.approx(sqrt(2)), pytest.approx(e))] - assert triplets_that_sum_to(nums, target) == expected - - def test_triplets_when_multiple_matches(): input = [10, 979, 5, 299, 2, 1456, 6, 8, 3] assert triplets_that_sum_to(input, 17) == [(10, 5, 2), (6, 8, 3)] From d53e1599b67dc3fa093da300eb0b1cafa874e2ff Mon Sep 17 00:00:00 2001 From: Laurence Billingham Date: Fri, 18 Feb 2022 18:50:53 +0000 Subject: [PATCH 2/7] perf: :zap: stop itertoolsing for pairwise, start nested looping --- NotesLog.md | 44 +++++++++++++++++++++- src/tuplesumfilter/sum_to_target.py | 17 +++++---- tests/test_floats_in_pairs_and_triplets.py | 14 ++++--- 3 files changed, 60 insertions(+), 15 deletions(-) diff --git a/NotesLog.md b/NotesLog.md index f7819e2..fafb871 100644 --- a/NotesLog.md +++ b/NotesLog.md @@ -49,7 +49,6 @@ eugh that it pretty bad ~0.4 for the triplets version $ make benchmark tests/performance_check.py .. [100%] - ------------------------------------------------------------------------------------- benchmark: 2 tests ------------------------------------------------------------------------------------ Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- @@ -65,3 +64,46 @@ before we can mess with the aglo. but 1st lets split out the float-y bits of the tests, because i think we might want to make it easier to stop supporting floats and lose those tests. + +wow, even just replacing the `math.isclose` in favor of simple `==` +gets us a ~30% speed up on the (`int` only) benchmarks. + +```sh +$ make benchmark +tests/performance_check.py .. [100%] + +------------------------------------------------------------------------------------- benchmark: 2 tests ------------------------------------------------------------------------------------ +Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_input1_pairs 2.8727 (1.0) 4.2386 (1.0) 3.1265 (1.0) 0.1638 (1.0) 3.1067 (1.0) 0.1888 (1.0) 78;9 319.8414 (1.0) 326 1 +test_input1_triplets 211.6325 (73.67) 213.3950 (50.35) 212.4042 (67.94) 0.6555 (4.00) 212.2717 (68.33) 0.8081 (4.28) 2;0 4.7080 (0.01) 5 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Legend: + Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile. + OPS: Operations Per Second, computed as 1 / Mean +=========================================================== 2 passed in 3.59s ============================================================ +``` + +but this does make some of our floaty tests fail. + +## simple nested loops for pairwise + +looks like we're seeing an perf improvement over itertools just by breaking out into loops for the pairs +roughly the same speedup we got by dropping float support, but we're back using `math.isclose` + +```sh +tests/performance_check.py .. [100%] + +------------------------------------------------------------------------------------- benchmark: 2 tests ------------------------------------------------------------------------------------ +Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_input1_pairs 3.0859 (1.0) 6.3263 (1.0) 3.4614 (1.0) 0.2696 (1.0) 3.4232 (1.0) 0.2591 (1.0) 44;6 288.8998 (1.0) 298 1 +test_input1_triplets 392.5019 (127.19) 394.4756 (62.36) 393.5971 (113.71) 0.8023 (2.98) 393.7625 (115.03) 1.2974 (5.01) 2;0 2.5407 (0.01) 5 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Legend: + Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile. + OPS: Operations Per Second, computed as 1 / Mean +=================================================================================== 2 passed in 4.87s =================================================================================== +``` \ No newline at end of file diff --git a/src/tuplesumfilter/sum_to_target.py b/src/tuplesumfilter/sum_to_target.py index 2e740d2..ea62b62 100644 --- a/src/tuplesumfilter/sum_to_target.py +++ b/src/tuplesumfilter/sum_to_target.py @@ -11,15 +11,15 @@ def pairs_that_sum_to(numbers: t.Sequence[t.Num], sum_target: t.Num) -> t.PairsOfNums: logger.bind( - sum_kind="pairs", - sum_target=sum_target, - len_input=len(numbers), - algo="itertools", + sum_kind="pairs", sum_target=sum_target, len_input=len(numbers), algo="nested" ) - pairs = list(ntuples_that_sum_to(numbers, sum_target, 2)) - logger.debug(f"found {len(pairs)} pair sequences that sum to {sum_target}") - # i promise mypy that we _are_ narrowing the types here - return t.cast(t.PairsOfNums, pairs) + summed = [] + for ileft, left in enumerate(numbers): + for right in numbers[ileft:]: + if math.isclose(sum_target, left + right, rel_tol=1e-09): + summed.append((left, right)) + logger.debug(f"found {len(summed)} pair sequences that sum to {sum_target}") + return summed def triplets_that_sum_to( @@ -58,5 +58,6 @@ def ntuples_that_sum_to( n_tuple for n_tuple in n_tuples if math.isclose(sum(n_tuple), sum_target, rel_tol=FLOAT_COMPARISON_REL_TOL) + # if sum(n_tuple) == sum_target ) return filtered diff --git a/tests/test_floats_in_pairs_and_triplets.py b/tests/test_floats_in_pairs_and_triplets.py index c36048a..8c4987b 100644 --- a/tests/test_floats_in_pairs_and_triplets.py +++ b/tests/test_floats_in_pairs_and_triplets.py @@ -5,14 +5,16 @@ def test_triplets_works_approx_with_floats(): - nums = [pi, sqrt(2), 17.45, 1e-10, e] - target = pi + e + sqrt(2) - expected = [(pytest.approx(pi), pytest.approx(sqrt(2)), pytest.approx(e))] + root2 = sqrt(2) + nums = [pi, root2, 17.45, 1e-10, e] + target = pi + e + root2 + expected = [(pytest.approx(pi), pytest.approx(root2), pytest.approx(e))] assert triplets_that_sum_to(nums, target) == expected def test_pairs_works_approx_with_floats(): - nums = [pi, sqrt(2), 17.45, 1e-10, e] - target = pi + e - expected = [(pytest.approx(pi), pytest.approx(e))] + root2 = sqrt(2) + nums = [pi, root2, 17.45, 1e-10, e] + target = pi + root2 + expected = [(pytest.approx(pi), pytest.approx(root2))] assert pairs_that_sum_to(nums, target) == expected From 92548168abbe05d9e48032ff2ef676491b2612bd Mon Sep 17 00:00:00 2001 From: Laurence Billingham Date: Fri, 18 Feb 2022 18:53:40 +0000 Subject: [PATCH 3/7] fix: :bug: :facepalm: stop loop nest incorrectly returning duplicate that sum --- src/tuplesumfilter/sum_to_target.py | 2 +- tests/test_pairs_that_sum.py | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/src/tuplesumfilter/sum_to_target.py b/src/tuplesumfilter/sum_to_target.py index ea62b62..7d7211c 100644 --- a/src/tuplesumfilter/sum_to_target.py +++ b/src/tuplesumfilter/sum_to_target.py @@ -15,7 +15,7 @@ def pairs_that_sum_to(numbers: t.Sequence[t.Num], sum_target: t.Num) -> t.PairsO ) summed = [] for ileft, left in enumerate(numbers): - for right in numbers[ileft:]: + for right in numbers[ileft + 1 :]: if math.isclose(sum_target, left + right, rel_tol=1e-09): summed.append((left, right)) logger.debug(f"found {len(summed)} pair sequences that sum to {sum_target}") diff --git a/tests/test_pairs_that_sum.py b/tests/test_pairs_that_sum.py index 7050da2..5afb43d 100644 --- a/tests/test_pairs_that_sum.py +++ b/tests/test_pairs_that_sum.py @@ -30,3 +30,8 @@ def test_pairs_with_finite_input_but_no_match(worked_example_nums): def test_pairs_when_multiple_matches(): example_input = [1, 979, 6, 299, 2, 1456, 5] assert pairs_that_sum_to(example_input, 7) == [(1, 6), (2, 5)] + + +def test_regression_pairs_excludes_repeats(): + got = pairs_that_sum_to([1, 2], 4) + assert got == [] From eb7a52789665cf8c294967e6dc8a3c7d5d6fbe6a Mon Sep 17 00:00:00 2001 From: Laurence Billingham Date: Fri, 18 Feb 2022 19:07:25 +0000 Subject: [PATCH 4/7] perf: :zap: stop itertoolsing for triplets, start nested looping --- NotesLog.md | 19 +++++++++++++++++++ src/tuplesumfilter/sum_to_target.py | 21 +++++++++++++++------ src/tuplesumfilter/types.py | 1 + tests/test_triplets_that_sum.py | 5 +++++ 4 files changed, 40 insertions(+), 6 deletions(-) diff --git a/NotesLog.md b/NotesLog.md index fafb871..c48f732 100644 --- a/NotesLog.md +++ b/NotesLog.md @@ -106,4 +106,23 @@ Legend: Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile. OPS: Operations Per Second, computed as 1 / Mean =================================================================================== 2 passed in 4.87s =================================================================================== +``` + +and now we've moved the triplets to nested loops too + +```sh +tests/performance_check.py .. [100%] + + +------------------------------------------------------------------------------------- benchmark: 2 tests ------------------------------------------------------------------------------------ +Name (time in ms) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- +test_input1_pairs 4.6867 (1.0) 9.1275 (1.0) 5.5921 (1.0) 0.8257 (1.0) 5.3198 (1.0) 0.8444 (1.0) 23;12 178.8246 (1.0) 193 1 +test_input1_triplets 371.6804 (79.31) 376.8461 (41.29) 374.2332 (66.92) 2.3729 (2.87) 373.5280 (70.21) 4.3788 (5.19) 3;0 2.6721 (0.01) 5 1 +--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- + +Legend: + Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile. + OPS: Operations Per Second, computed as 1 / Mean +======================================================================================================= 2 passed in 4.81s ======================================================================================================== ``` \ No newline at end of file diff --git a/src/tuplesumfilter/sum_to_target.py b/src/tuplesumfilter/sum_to_target.py index 7d7211c..4c2bf07 100644 --- a/src/tuplesumfilter/sum_to_target.py +++ b/src/tuplesumfilter/sum_to_target.py @@ -9,6 +9,11 @@ logger = get_logger() +def _comparitor(number: t.Num, sum_target: t.Num) -> t.Boolean: + """really just a single place to switch between == and math.isclose""" + return math.isclose(number, sum_target, rel_tol=1e-09) + + def pairs_that_sum_to(numbers: t.Sequence[t.Num], sum_target: t.Num) -> t.PairsOfNums: logger.bind( sum_kind="pairs", sum_target=sum_target, len_input=len(numbers), algo="nested" @@ -16,7 +21,7 @@ def pairs_that_sum_to(numbers: t.Sequence[t.Num], sum_target: t.Num) -> t.PairsO summed = [] for ileft, left in enumerate(numbers): for right in numbers[ileft + 1 :]: - if math.isclose(sum_target, left + right, rel_tol=1e-09): + if _comparitor(left + right, sum_target): summed.append((left, right)) logger.debug(f"found {len(summed)} pair sequences that sum to {sum_target}") return summed @@ -29,12 +34,16 @@ def triplets_that_sum_to( sum_kind="triplets", sum_target=sum_target, len_input=len(numbers), - algo="itertools", + algo="nested", ) - triplets = list(ntuples_that_sum_to(numbers, sum_target, 3)) - logger.debug(f"found {len(triplets)} triplet sequences that sum to {sum_target}") - # i promise mypy, again, that we _are_ narrowing the types here - return t.cast(t.TripletsOfNums, triplets) + summed = [] + for ileft, left in enumerate(numbers): + for jcentre, center in enumerate(numbers[ileft + 1 :]): + for right in numbers[(ileft + jcentre + 2) :]: + if _comparitor(left + center + right, sum_target): + summed.append((left, center, right)) + logger.debug(f"found {len(summed)} triplet sequences that sum to {sum_target}") + return summed def ntuples_that_sum_to( diff --git a/src/tuplesumfilter/types.py b/src/tuplesumfilter/types.py index 5b06c6d..34564fa 100644 --- a/src/tuplesumfilter/types.py +++ b/src/tuplesumfilter/types.py @@ -20,6 +20,7 @@ def read_file(fname: t.Path) cast = typ.cast +Boolean = bool Generator = typ.Generator Int = int List = typ.List diff --git a/tests/test_triplets_that_sum.py b/tests/test_triplets_that_sum.py index d4fe1c0..84222cc 100644 --- a/tests/test_triplets_that_sum.py +++ b/tests/test_triplets_that_sum.py @@ -34,3 +34,8 @@ def test_triplets_with_finite_input_but_no_match(worked_example_nums): def test_triplets_when_multiple_matches(): input = [10, 979, 5, 299, 2, 1456, 6, 8, 3] assert triplets_that_sum_to(input, 17) == [(10, 5, 2), (6, 8, 3)] + + +def test_regression_triplets_excludes_repeats(): + got = triplets_that_sum_to([1, 2, 7, 8, 9], 4) + assert got == [] From 2b62cc5a5776c79430984e5d6c926fec53d80fea Mon Sep 17 00:00:00 2001 From: Laurence Billingham Date: Sun, 20 Feb 2022 20:20:49 +0000 Subject: [PATCH 5/7] perf: BREAKING :zap: :fire: speed up pairs using storage but BREAK support for float inputs --- Makefile | 7 +++- NotesLog.md | 30 ++++++++++++++- pyproject.toml | 6 +++ src/tuplesumfilter/sum_to_target.py | 43 ++++++---------------- tests/performance_check.py | 3 ++ tests/test_floats_in_pairs_and_triplets.py | 2 + 6 files changed, 56 insertions(+), 35 deletions(-) diff --git a/Makefile b/Makefile index 1f73645..55d56a9 100644 --- a/Makefile +++ b/Makefile @@ -13,10 +13,13 @@ all: @echo "release - Build distribution and release to PyPI." test: - python -m pytest + python -m pytest -m 'not benchmark' -m 'not floats' + +test_with_floats: + python -m pytest -m 'not benchmark' benchmark: - python -m pytest tests/performance* + python -m pytest -m benchmark tests/performance* coverage: python -m pytest --cov=tuplesumfilter tests --cov-fail-under 90 diff --git a/NotesLog.md b/NotesLog.md index c48f732..f50cef8 100644 --- a/NotesLog.md +++ b/NotesLog.md @@ -125,4 +125,32 @@ Legend: Outliers: 1 Standard Deviation from Mean; 1.5 IQR (InterQuartile Range) from 1st Quartile and 3rd Quartile. OPS: Operations Per Second, computed as 1 / Mean ======================================================================================================= 2 passed in 4.81s ======================================================================================================== -``` \ No newline at end of file +``` + +### trading space for speed + +its relatively common to speed algos up by trading off some memory +for reduced CPU tome complexity. + +I reckon we can do that here using something to keep track of values we've already seen +it'll need to be a fast, O(1), lookup thing: so a `set` or `dict`. + +However, we'll need to ditch `float` support because they don't hash. + +Okaaaay + +That gives us a big speedup in the pair version: note that we are now measuring _micro_ not _milli_ seconds + +```sh +tests/performance_check.py .. [100%] + + +-------------------------------------------------------------------------------------------------- benchmark: 2 tests -------------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +test_input1_pairs 22.1710 (1.0) 152.1860 (1.0) 23.5753 (1.0) 5.5831 (1.0) 22.8580 (1.0) 0.4057 (1.0) 114;461 42,417.2354 (1.0) 7671 1 +test_input1_triplets 176,173.4430 (>1000.0) 188,509.7390 (>1000.0) 184,815.9523 (>1000.0) 4,741.3658 (849.24) 186,896.7185 (>1000.0) 5,253.1090 (>1000.0) 1;0 5.4108 (0.00) 6 1 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +``` + +that is > a 200x (nearly 250x) speedup for the pairs. diff --git a/pyproject.toml b/pyproject.toml index d4cf7d4..bac908c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,3 +9,9 @@ source = ["src", ".tox/*/site-packages"] [tool.coverage.report] show_missing = true + +[tool.pytest.ini_options] +markers = [ + "benchmark: marks tests as for performance benchmarking: they are slow (deselect with '-m \"not benchmark\"')", + "floats: marks tests as for testing floating point support: some algorithms don't work for floats (deselect with '-m \"not float\"')", +] diff --git a/src/tuplesumfilter/sum_to_target.py b/src/tuplesumfilter/sum_to_target.py index 4c2bf07..1b96d0e 100644 --- a/src/tuplesumfilter/sum_to_target.py +++ b/src/tuplesumfilter/sum_to_target.py @@ -11,18 +11,23 @@ def _comparitor(number: t.Num, sum_target: t.Num) -> t.Boolean: """really just a single place to switch between == and math.isclose""" - return math.isclose(number, sum_target, rel_tol=1e-09) + return number == sum_target + # return math.isclose(number, sum_target, rel_tol=1e-09) def pairs_that_sum_to(numbers: t.Sequence[t.Num], sum_target: t.Num) -> t.PairsOfNums: logger.bind( - sum_kind="pairs", sum_target=sum_target, len_input=len(numbers), algo="nested" + sum_kind="pairs", + sum_target=sum_target, + len_input=len(numbers), + algo="w-storage-int-only", ) + already_seen = set() summed = [] - for ileft, left in enumerate(numbers): - for right in numbers[ileft + 1 :]: - if _comparitor(left + right, sum_target): - summed.append((left, right)) + for comparitor in numbers: + if (sum_target - comparitor) in already_seen: + summed.append((sum_target - comparitor, comparitor)) + already_seen.add(comparitor) logger.debug(f"found {len(summed)} pair sequences that sum to {sum_target}") return summed @@ -44,29 +49,3 @@ def triplets_that_sum_to( summed.append((left, center, right)) logger.debug(f"found {len(summed)} triplet sequences that sum to {sum_target}") return summed - - -def ntuples_that_sum_to( - numbers: t.Sequence[t.Num], sum_target: t.Num, dimensions: t.Int -) -> t.Generator[t.NTupelOfNums, None, None]: - """ - Filters a the input `numbers` by whether their n-tuple combinations - sum to match the `sum_target`, the n in n-tuple is controlled by `dimensions`. - e.g. pairs of numbers for dimenions==2 - - Returns - ------- - A generator from which we can pull the matching combinations. - - >>> input = [1, 2, 3, 4] - >>> list(ntuples_that_sum_to(input, 7, 3)) - >>> [(1, 2, 4)] - """ - n_tuples = itertools.combinations(numbers, dimensions) - filtered = ( - n_tuple - for n_tuple in n_tuples - if math.isclose(sum(n_tuple), sum_target, rel_tol=FLOAT_COMPARISON_REL_TOL) - # if sum(n_tuple) == sum_target - ) - return filtered diff --git a/tests/performance_check.py b/tests/performance_check.py index de545d2..bd26572 100644 --- a/tests/performance_check.py +++ b/tests/performance_check.py @@ -1,7 +1,10 @@ from pathlib import Path +import pytest from tuplesumfilter import numbers_in_file, pairs_that_sum_to, triplets_that_sum_to +pytestmark = pytest.mark.benchmark + INPUT1_FILE = Path("./tests/__test_data__") / "input1.txt" diff --git a/tests/test_floats_in_pairs_and_triplets.py b/tests/test_floats_in_pairs_and_triplets.py index 8c4987b..406c758 100644 --- a/tests/test_floats_in_pairs_and_triplets.py +++ b/tests/test_floats_in_pairs_and_triplets.py @@ -3,6 +3,8 @@ import pytest from tuplesumfilter import pairs_that_sum_to, triplets_that_sum_to +pytestmark = pytest.mark.floats + def test_triplets_works_approx_with_floats(): root2 = sqrt(2) From 3897d490e1106ca822b94dbac3f375509deb0989 Mon Sep 17 00:00:00 2001 From: Laurence Billingham Date: Sun, 20 Feb 2022 20:29:23 +0000 Subject: [PATCH 6/7] perf: :zap: add storage for triplets > 150x speedup --- NotesLog.md | 24 +++++++++++++++++++++++- src/tuplesumfilter/sum_to_target.py | 13 ++++++------- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/NotesLog.md b/NotesLog.md index f50cef8..ae40c7e 100644 --- a/NotesLog.md +++ b/NotesLog.md @@ -133,7 +133,7 @@ its relatively common to speed algos up by trading off some memory for reduced CPU tome complexity. I reckon we can do that here using something to keep track of values we've already seen -it'll need to be a fast, O(1), lookup thing: so a `set` or `dict`. +it'll need to be a fast, $O{1}$, lookup thing: so a `set` or `dict`. However, we'll need to ditch `float` support because they don't hash. @@ -154,3 +154,25 @@ test_input1_triplets 176,173.4430 (>1000.0) 188,509.7390 (>1000.0) 184,815 ``` that is > a 200x (nearly 250x) speedup for the pairs. + + +I'll split the float test out further using `pytest` markers and fix up the makefile. + +Now lets do stuff for the triplet version + + +```sh +$ make benchmark +tests/performance_check.py .. [100%] + +-------------------------------------------------------------------------------------------- benchmark: 2 tests -------------------------------------------------------------------------------------------- +Name (time in us) Min Max Mean StdDev Median IQR Outliers OPS Rounds Iterations +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +test_input1_pairs 22.1660 (1.0) 166.3790 (1.0) 23.6089 (1.0) 5.0170 (1.0) 23.0000 (1.0) 0.5123 (1.0) 87;521 42,356.8183 (1.0) 7677 1 +test_input1_triplets 1,994.8000 (89.99) 3,561.1120 (21.40) 2,152.1272 (91.16) 204.1428 (40.69) 2,033.1040 (88.40) 299.1878 (584.07) 41;4 464.6565 (0.01) 341 1 +------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ +``` + +again note the unit changer for the triplets, +even though we're still (i think $O{n^2}$, down from $O{n^3}$) +we are seeing an approx 175x speedup in the triplets diff --git a/src/tuplesumfilter/sum_to_target.py b/src/tuplesumfilter/sum_to_target.py index 1b96d0e..49b3cfc 100644 --- a/src/tuplesumfilter/sum_to_target.py +++ b/src/tuplesumfilter/sum_to_target.py @@ -1,6 +1,3 @@ -import itertools -import math - import tuplesumfilter.types as t from tuplesumfilter.app_logging import get_logger @@ -43,9 +40,11 @@ def triplets_that_sum_to( ) summed = [] for ileft, left in enumerate(numbers): - for jcentre, center in enumerate(numbers[ileft + 1 :]): - for right in numbers[(ileft + jcentre + 2) :]: - if _comparitor(left + center + right, sum_target): - summed.append((left, center, right)) + already_seen = set() + still_need = sum_target - left + for right in numbers[ileft + 1 :]: + if still_need - right in already_seen: + summed.append((left, (sum_target - left - right), right)) + already_seen.add(right) logger.debug(f"found {len(summed)} triplet sequences that sum to {sum_target}") return summed From 3802f47b154f9ac0e61b91b5a3a07ea0123cf64f Mon Sep 17 00:00:00 2001 From: Laurence Billingham Date: Mon, 21 Feb 2022 12:10:51 +0000 Subject: [PATCH 7/7] fix: :green_heart: remove float-y tests from CI coverage test --- Makefile | 31 +++++++++++++++++------------ src/tuplesumfilter/sum_to_target.py | 6 ------ 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/Makefile b/Makefile index 55d56a9..c0b05b1 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,18 @@ all: - @echo "dev_install - Install development dependencies" - @echo "test - Run tests" - @echo "typecheck - Run static type checking only python>=3.10" - @echo "lint - Run static analysis for common problems" - @echo "autoformat - Format all code to a consistent style" - @echo "coverage - Run tests and check the test coverage" - @echo "benchmark - Run a small suite to check performance" - @echo "ci_install - Install dependencies needed for CI" - @echo "fixed_install - Install as a non-editible package like production consumers" - @echo "clean - Delete generated files" - @echo "dist - Build distribution artifacts" - @echo "release - Build distribution and release to PyPI." + @echo "dev_install - Install development dependencies" + @echo "test - Run tests" + @echo "typecheck - Run static type checking only python>=3.10" + @echo "lint - Run static analysis for common problems" + @echo "autoformat - Format all code to a consistent style" + @echo "coverage - Run tests and check the test coverage" + @echo "benchmark - Run a small suite to check performance" + @echo "test_with_floats - Run tests: including the ones with floating point inputs" + @echo "coverage_with_floats - Check test coverage: including floating point input examples" + @echo "ci_install - Install dependencies needed for CI" + @echo "fixed_install - Install as a non-editible package like production consumers" + @echo "clean - Delete generated files" + @echo "dist - Build distribution artifacts" + @echo "release - Build distribution and release to PyPI." test: python -m pytest -m 'not benchmark' -m 'not floats' @@ -22,6 +24,9 @@ benchmark: python -m pytest -m benchmark tests/performance* coverage: + python -m pytest --cov=tuplesumfilter -m 'not benchmark' -m 'not floats' tests --cov-fail-under 90 + +coverage_with_floats: python -m pytest --cov=tuplesumfilter tests --cov-fail-under 90 typecheck: @@ -57,4 +62,4 @@ dist: clean release: dist twine upload dist/*.* -.PHONY: all autoformat benchmark ci_install clean coverage dev_install dist fixed_install test typecheck \ No newline at end of file +.PHONY: all autoformat benchmark ci_install clean coverage coverage_with_floats dev_install dist fixed_install test test_with_floats typecheck \ No newline at end of file diff --git a/src/tuplesumfilter/sum_to_target.py b/src/tuplesumfilter/sum_to_target.py index 49b3cfc..312a7a0 100644 --- a/src/tuplesumfilter/sum_to_target.py +++ b/src/tuplesumfilter/sum_to_target.py @@ -6,12 +6,6 @@ logger = get_logger() -def _comparitor(number: t.Num, sum_target: t.Num) -> t.Boolean: - """really just a single place to switch between == and math.isclose""" - return number == sum_target - # return math.isclose(number, sum_target, rel_tol=1e-09) - - def pairs_that_sum_to(numbers: t.Sequence[t.Num], sum_target: t.Num) -> t.PairsOfNums: logger.bind( sum_kind="pairs",