From 9c15a73008abc5a675ce3d2087cf78c2281f5e90 Mon Sep 17 00:00:00 2001 From: Samuel Sloniker Date: Fri, 22 Jul 2022 18:12:59 -0700 Subject: [PATCH] Add code --- README.md | 10 +++++++- fast_stdev.py | 48 +++++++++++++++++++++++++++++++++++ test.py | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 1 deletion(-) create mode 100755 fast_stdev.py create mode 100644 test.py diff --git a/README.md b/README.md index b1ce594..ec82dd4 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,11 @@ # fast-stdev -Fast(er) standard deviation calculation (compared to the standard library) for Python [experimental] \ No newline at end of file +Fast(er) standard deviation calculation (compared to the standard library) for +Python [experimental] + + import fast_stdev + print(fast_stdev.stdev([1, 2, 4])) # Sample standard deviation + print(fast_stdev.pstdev([1, 2, 4])) # Population standard deviation + +Run `test.py` to compare results and run times with the `statistics.[p]stdev` +functions and NumPy's `array.std`. diff --git a/fast_stdev.py b/fast_stdev.py new file mode 100755 index 0000000..40e0bdb --- /dev/null +++ b/fast_stdev.py @@ -0,0 +1,48 @@ +# SPDX-License-Identifier: LGPL-3.0-or-later + +import math +from typing import Sequence, Union, List + + +def _divide_list( + dividends: Sequence[Union[float, int]], divisor: Union[float, int] +) -> float: + return sum(dividends) / divisor + + +def stdev(numbers: Sequence[float]) -> float: + """Calculate the sample standard deviation of a group of numbers + + Parameters + ---------- + numbers : list of int or float + The numbers to calculate the mean of + + Returns + ------- + float + The sample standard deviation of the numbers + + """ + mean = _divide_list(numbers, len(numbers)) + squared_deviations = [(mean - i) ** 2 for i in numbers] + return math.sqrt(_divide_list(squared_deviations, len(numbers) - 1)) + + +def pstdev(numbers: Sequence[float]) -> float: + """Calculate the population standard deviation of a group of numbers + + Parameters + ---------- + numbers : list of int or float + The numbers to calculate the mean of + + Returns + ------- + float + The population standard deviation of the numbers + + """ + mean = _divide_list(numbers, len(numbers)) + squared_deviations = [(mean - i) ** 2 for i in numbers] + return math.sqrt(_divide_list(squared_deviations, len(numbers))) diff --git a/test.py b/test.py new file mode 100644 index 0000000..ebc0731 --- /dev/null +++ b/test.py @@ -0,0 +1,69 @@ +import timeit +import statistics +import fast_stdev +import random +import numpy +import sys + +print("Python", sys.version, "\n") + +iterations = 1000 +numbers = [1, 2, 4] +numbers = [random.randint(1, 100) for i in range(1000)] +numbers = [random.random() for i in range(1000)] +np_array = numpy.array(numbers) + + +def time(code): + time = round( + 1000000 + * timeit.timeit( + code, + number=iterations, + globals=globals(), + ) + / iterations + ) + + return "(time: " + (str(time)).rjust(4, " ") + "μs)" + + +print("Sample standard deviation:") + +print( + " stdlib : " + + str(statistics.stdev(numbers)).ljust(20, " ") + + time("statistics.stdev(numbers)") +) + +print( + " fast_stdev : " + + str(fast_stdev.stdev(numbers)).ljust(20, " ") + + time("fast_stdev.stdev(numbers)") +) + +print( + " numpy : " + + str(np_array.std(ddof=1)).ljust(20, " ") + + time("np_array.std(ddof=1)") +) + +print("\nPopulation standard deviation:") + +print( + " stdlib : " + + str(statistics.pstdev(numbers)).ljust(20, " ") + + time("statistics.pstdev(numbers)") +) + +print( + " fast_stdev : " + + str(fast_stdev.pstdev(numbers)).ljust(20, " ") + + time("fast_stdev.pstdev(numbers)") +) + +print( + " numpy : " + + str(np_array.std()).ljust(20, " ") + + time("np_array.std()") +)