diff --git a/python-perfplot/README.md b/python-perfplot/README.md new file mode 100644 index 0000000000..e032b6963c --- /dev/null +++ b/python-perfplot/README.md @@ -0,0 +1,23 @@ +# Python `perfplot`: Measuring Performance Made Easy + +Here you'll find the code samples and supplementary materials for the corresponding article. + +To get set up you should create a virtual environment and install two dependencies: + +macOS and Linux: + +```shell +$ python -m venv venv +$ source venv/bin/activate +(venv) $ python -m pip install perfplot faker +``` + +Windows PowerShell: + +```ps +PS> python -m venv venv +PS> venv\scripts\activate +(venv) PS> python -m pip install perfplot faker +``` + +From there, you can run any of the scripts included. diff --git a/python-perfplot/first_match.py b/python-perfplot/first_match.py new file mode 100644 index 0000000000..03d06abeeb --- /dev/null +++ b/python-perfplot/first_match.py @@ -0,0 +1,33 @@ +import perfplot + + +def build_list(size, fill, value, at_position): + return [value if i == at_position else fill for i in range(size)] + + +def find_match_loop(iterable): + for value in iterable: + if value["population"] > 50: + return value + return None + + +def find_match_gen(iterable): + return next( + (value for value in iterable if value["population"] > 50), None + ) + + +perfplot.show( + n_range=[2**n for n in range(25)], + setup=lambda n: build_list( + size=n, + fill={"country": "Nowhere", "population": 10}, + value={"country": "Atlantis", "population": 100}, + at_position=n // 2, + ), + kernels=[find_match_loop, find_match_gen], + labels=["loop", "gen"], + equality_check=None, + relative_to=0, +) diff --git a/python-perfplot/generate_random_ints.py b/python-perfplot/generate_random_ints.py new file mode 100644 index 0000000000..dc8174ce15 --- /dev/null +++ b/python-perfplot/generate_random_ints.py @@ -0,0 +1,12 @@ +import random +import json + +random_ints = [random.randint(0, 10_000) for _ in range(2**25)] + +with open("random_ints.txt", mode="w", encoding="utf-8") as file: + file.write(json.dumps(random_ints)) + +reverse_sorted = list(range(2**25, -1, -1)) + +with open("reverse_sorted.txt", mode="w", encoding="utf-8") as file: + file.write(json.dumps(random_ints)) diff --git a/python-perfplot/imgs/nearly_sorted.png b/python-perfplot/imgs/nearly_sorted.png new file mode 100644 index 0000000000..312a42c03e Binary files /dev/null and b/python-perfplot/imgs/nearly_sorted.png differ diff --git a/python-perfplot/imgs/nearly_sorted_alt.png b/python-perfplot/imgs/nearly_sorted_alt.png new file mode 100644 index 0000000000..5a23dc83e3 Binary files /dev/null and b/python-perfplot/imgs/nearly_sorted_alt.png differ diff --git a/python-perfplot/imgs/random_list.png b/python-perfplot/imgs/random_list.png new file mode 100644 index 0000000000..c5189fe769 Binary files /dev/null and b/python-perfplot/imgs/random_list.png differ diff --git a/python-perfplot/imgs/reverse_list.png b/python-perfplot/imgs/reverse_list.png new file mode 100644 index 0000000000..7d373f0682 Binary files /dev/null and b/python-perfplot/imgs/reverse_list.png differ diff --git a/python-perfplot/imgs/sorted_list.png b/python-perfplot/imgs/sorted_list.png new file mode 100644 index 0000000000..6794f58723 Binary files /dev/null and b/python-perfplot/imgs/sorted_list.png differ diff --git a/python-perfplot/lambda_vs_getter.py b/python-perfplot/lambda_vs_getter.py new file mode 100644 index 0000000000..ce37f80239 --- /dev/null +++ b/python-perfplot/lambda_vs_getter.py @@ -0,0 +1,35 @@ +import operator +import random + +import faker +import perfplot + +faker.Faker.seed(10) +fake = faker.Faker() + + +def create_dict(n): + return {fake.name(): random.randint(10, 200) for _ in range(n)} + + +def sort_with_lambda(dict_to_sort: dict): + return sorted(dict_to_sort.items(), key=lambda item: item[1]) + + +def sort_with_itemgetter(dict_to_sort: dict): + sorted(dict_to_sort.items(), key=operator.itemgetter(1)) + + +perfplot.live( + setup=lambda n: create_dict(n), + kernels=[sort_with_lambda, sort_with_itemgetter], + n_range=[n**2 for n in range(20)], + equality_check=None, +) + +perfplot.live( + setup=lambda n: create_dict(n), + kernels=[sort_with_lambda, sort_with_itemgetter], + n_range=[2**n for n in range(20)], + equality_check=None, +) diff --git a/python-perfplot/list_files.py b/python-perfplot/list_files.py new file mode 100644 index 0000000000..2f4784933d --- /dev/null +++ b/python-perfplot/list_files.py @@ -0,0 +1,66 @@ +import os +import pathlib +import shutil + +import perfplot + + +def make_dir_with_files(dir_name, number_of_files): + flat_dir = pathlib.Path(dir_name) + + if flat_dir.exists(): + shutil.rmtree(flat_dir) + + flat_dir.mkdir(exist_ok=True) + + for file_number in range(number_of_files): + pathlib.Path(flat_dir / pathlib.Path(f"{file_number}.txt")).touch() + return flat_dir + + +def glob_star(flat_dir): + return list(flat_dir.glob("*")) + + +def glob_star_star(flat_dir): + return list(flat_dir.glob("**/*")) + + +def rglob_star(flat_dir): + return list(flat_dir.rglob("*")) + + +def iterdir(flat_dir): + return list(flat_dir.iterdir()) + + +def os_walk(flat_dir): + _, dirs, files = next(os.walk(flat_dir)) + return [*dirs, *files] + + +def os_scandir(flat_dir): + return list(os.scandir(flat_dir)) + + +def os_listdir(flat_dir): + return os.listdir(flat_dir) + + +perfplot.show( + setup=lambda n: make_dir_with_files( + "temp", + number_of_files=n, + ), + kernels=[ + glob_star, + glob_star_star, + rglob_star, + iterdir, + os_walk, + os_scandir, + os_listdir, + ], + n_range=[2**n for n in range(13)], + equality_check=None, +) diff --git a/python-perfplot/perfplot_args_demo.py b/python-perfplot/perfplot_args_demo.py new file mode 100644 index 0000000000..13f5610009 --- /dev/null +++ b/python-perfplot/perfplot_args_demo.py @@ -0,0 +1,28 @@ +import time + +import perfplot + + +def setup(n): + print(f"setting up {n}") + return n * "#" + + +def kernel_one(setup_result): + print(f"kernel one: {setup_result}") + time.sleep(1) + return setup_result + + +def kernel_two(setup_result): + print(f"kernel two: {setup_result}") + time.sleep(1.5) + return setup_result + + +perfplot.show( + n_range=[1, 2, 3], + setup=setup, + kernels=[kernel_one, kernel_two], + target_time_per_measurement=4, +) diff --git a/python-perfplot/plot_sorting_basic.py b/python-perfplot/plot_sorting_basic.py new file mode 100644 index 0000000000..565c5d0e4c --- /dev/null +++ b/python-perfplot/plot_sorting_basic.py @@ -0,0 +1,26 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, +) + + +perfplot.show( + n_range=range(0, 1_001, 100), + setup=lambda n: [randint(0, 1_000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], +) diff --git a/python-perfplot/plot_sorting_bench.py b/python-perfplot/plot_sorting_bench.py new file mode 100644 index 0000000000..10a6488c07 --- /dev/null +++ b/python-perfplot/plot_sorting_bench.py @@ -0,0 +1,28 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + python_built_in_sort, + quick_sort, + tim_sort, +) + +data = perfplot.bench( + n_range=[2**n for n in range(10)], + setup=lambda n: [randint(0, 1000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], +) + +data.save("sorting_algos_log_x.png", logx=True) +data.save("sorting_algos_linear_x.png", logx=False) diff --git a/python-perfplot/plot_sorting_large_range.py b/python-perfplot/plot_sorting_large_range.py new file mode 100644 index 0000000000..0d3ebc4a6e --- /dev/null +++ b/python-perfplot/plot_sorting_large_range.py @@ -0,0 +1,26 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, +) + +perfplot.show( + n_range=range(0, 10_001, 1_000), + setup=lambda n: [randint(0, 1_000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], + logy=True, +) diff --git a/python-perfplot/plot_sorting_live.py b/python-perfplot/plot_sorting_live.py new file mode 100644 index 0000000000..0f49ee19f0 --- /dev/null +++ b/python-perfplot/plot_sorting_live.py @@ -0,0 +1,35 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, +) + +perfplot.live( + n_range=[2**n for n in range(15)], + setup=lambda n: [randint(0, 1_000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], + logy=True, + labels=[ + "bubble sort", + "insertion sort", + "merge sort", + "quick sort", + "tim sort", + "built in sort", + ], + xlabel="Number of elements in list", +) diff --git a/python-perfplot/plot_sorting_powers_of_two.py b/python-perfplot/plot_sorting_powers_of_two.py new file mode 100644 index 0000000000..78df2546c7 --- /dev/null +++ b/python-perfplot/plot_sorting_powers_of_two.py @@ -0,0 +1,25 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, +) + +perfplot.show( + n_range=[2**n for n in range(15)], + setup=lambda n: [randint(0, 1000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], +) diff --git a/python-perfplot/plot_sorting_pretty.py b/python-perfplot/plot_sorting_pretty.py new file mode 100644 index 0000000000..1e40c956fd --- /dev/null +++ b/python-perfplot/plot_sorting_pretty.py @@ -0,0 +1,37 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, +) + +perfplot.show( + n_range=[2**n for n in range(15)], + setup=lambda n: [randint(0, 1_000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], + logy=True, + labels=[ + "bubble sort", + "insertion sort", + "merge sort", + "quick sort", + "tim sort", + "built in sort", + ], + title="Sorting Algorithms", + xlabel="Number of elements in list", + time_unit="ns", +) diff --git a/python-perfplot/plot_sorting_random_vs_sorted.py b/python-perfplot/plot_sorting_random_vs_sorted.py new file mode 100644 index 0000000000..c7300df4bd --- /dev/null +++ b/python-perfplot/plot_sorting_random_vs_sorted.py @@ -0,0 +1,56 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, +) + + +def random_list(n): + return [randint(0, n) for _ in range(n)] + + +def sorted_list(n): + return list(range(n)) + + +def nearly_sorted(n): + return [1, *list(range(n - 1))] + + +def nearly_sorted_alt(n): + return [n, *list(range(n - 1))] + + +def reverse_list(n): + return list(range(n - 1, -1, -1)) + + +for setup_f in [ + random_list, + sorted_list, + nearly_sorted, + nearly_sorted_alt, + reverse_list, +]: + data = perfplot.bench( + n_range=[2**n for n in range(20)], + setup=setup_f, + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], + title=setup_f.__name__, + ) + + data.save(f"{setup_f.__name__}.png") diff --git a/python-perfplot/plot_sorting_relative_to.py b/python-perfplot/plot_sorting_relative_to.py new file mode 100644 index 0000000000..08d6b36e9c --- /dev/null +++ b/python-perfplot/plot_sorting_relative_to.py @@ -0,0 +1,27 @@ +from random import randint + +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, +) + +perfplot.show( + n_range=[2**n for n in range(15)], + setup=lambda n: [randint(0, 1_000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], + relative_to=3, + logy=True, +) diff --git a/python-perfplot/plot_sorting_relative_to_plot.py b/python-perfplot/plot_sorting_relative_to_plot.py new file mode 100644 index 0000000000..994218269f --- /dev/null +++ b/python-perfplot/plot_sorting_relative_to_plot.py @@ -0,0 +1,33 @@ +from random import randint + +import matplotlib.pyplot as plt +import perfplot + +from sorting import ( + bubble_sort, + insertion_sort, + merge_sort, + python_built_in_sort, + quick_sort, + tim_sort, +) + +data = perfplot.bench( + n_range=[2**n for n in range(15)], + setup=lambda n: [randint(0, 1_000) for _ in range(n)], + kernels=[ + bubble_sort, + insertion_sort, + merge_sort, + quick_sort, + tim_sort, + python_built_in_sort, + ], +) + + +data.plot(relative_to=3, logy=True) + +plt.gcf().set_size_inches(12, 7) +plt.subplots_adjust(left=0.2, right=0.8, top=0.85, bottom=0.15) +plt.savefig("plot_sorting_relative_to.png", transparent=True) diff --git a/python-perfplot/requirements.in b/python-perfplot/requirements.in new file mode 100644 index 0000000000..ea5c9c6590 --- /dev/null +++ b/python-perfplot/requirements.in @@ -0,0 +1,2 @@ +perfplot==0.10.2 +faker diff --git a/python-perfplot/sorting.py b/python-perfplot/sorting.py new file mode 100644 index 0000000000..a1e9be3bbe --- /dev/null +++ b/python-perfplot/sorting.py @@ -0,0 +1,108 @@ +from random import randint + + +def bubble_sort(array): + n = len(array) + for i in range(n): + already_sorted = True + for j in range(n - i - 1): + if array[j] > array[j + 1]: + array[j], array[j + 1] = array[j + 1], array[j] + already_sorted = False + if already_sorted: + break + return array + + +def insertion_sort(array): + for i in range(1, len(array)): + key_item = array[i] + j = i - 1 + while j >= 0 and array[j] > key_item: + array[j + 1] = array[j] + j -= 1 + array[j + 1] = key_item + return array + + +def merge(left, right): + if len(left) == 0: + return right + if len(right) == 0: + return left + result = [] + index_left = index_right = 0 + while len(result) < len(left) + len(right): + if left[index_left] <= right[index_right]: + result.append(left[index_left]) + index_left += 1 + else: + result.append(right[index_right]) + index_right += 1 + if index_right == len(right): + result += left[index_left:] + break + if index_left == len(left): + result += right[index_right:] + break + return result + + +def merge_sort(array): + if len(array) < 2: + return array + midpoint = len(array) // 2 + return merge( + left=merge_sort(array[:midpoint]), right=merge_sort(array[midpoint:]) + ) + + +def quick_sort(array): + if len(array) < 2: + return array + low, same, high = [], [], [] + pivot = array[randint(0, len(array) - 1)] + for item in array: + if item < pivot: + low.append(item) + elif item == pivot: + same.append(item) + elif item > pivot: + high.append(item) + return quick_sort(low) + same + quick_sort(high) + + +def insertion_sort_tim(array, left=0, right=None): + if right is None: + right = len(array) - 1 + for i in range(left + 1, right + 1): + key_item = array[i] + j = i - 1 + while j >= left and array[j] > key_item: + array[j + 1] = array[j] + j -= 1 + array[j + 1] = key_item + return array + + +def tim_sort(array): + min_run = 32 + n = len(array) + for i in range(0, n, min_run): + insertion_sort_tim(array, i, min((i + min_run - 1), n - 1)) + size = min_run + while size < n: + for start in range(0, n, size * 2): + midpoint = start + size - 1 + end = min((start + size * 2 - 1), (n - 1)) + merged_array = merge( + left=array[start : midpoint + 1], + right=array[midpoint + 1 : end + 1], + ) + array[start : start + len(merged_array)] = merged_array + size *= 2 + return array + + +def python_built_in_sort(array): + return sorted(array)