This commit is contained in:
Samuel Sloniker 2022-07-22 18:12:59 -07:00
parent 20dc95f087
commit 9c15a73008
3 changed files with 126 additions and 1 deletions

View File

@ -1,3 +1,11 @@
# fast-stdev
Fast(er) standard deviation calculation (compared to the standard library) for Python [experimental]
Fast(er) standard deviation calculation (compared to the standard library) for
Python [experimental]
import fast_stdev
print(fast_stdev.stdev([1, 2, 4])) # Sample standard deviation
print(fast_stdev.pstdev([1, 2, 4])) # Population standard deviation
Run `test.py` to compare results and run times with the `statistics.[p]stdev`
functions and NumPy's `array.std`.

48
fast_stdev.py Executable file
View File

@ -0,0 +1,48 @@
# SPDX-License-Identifier: LGPL-3.0-or-later
import math
from typing import Sequence, Union, List
def _divide_list(
dividends: Sequence[Union[float, int]], divisor: Union[float, int]
) -> float:
return sum(dividends) / divisor
def stdev(numbers: Sequence[float]) -> float:
"""Calculate the sample standard deviation of a group of numbers
Parameters
----------
numbers : list of int or float
The numbers to calculate the mean of
Returns
-------
float
The sample standard deviation of the numbers
"""
mean = _divide_list(numbers, len(numbers))
squared_deviations = [(mean - i) ** 2 for i in numbers]
return math.sqrt(_divide_list(squared_deviations, len(numbers) - 1))
def pstdev(numbers: Sequence[float]) -> float:
"""Calculate the population standard deviation of a group of numbers
Parameters
----------
numbers : list of int or float
The numbers to calculate the mean of
Returns
-------
float
The population standard deviation of the numbers
"""
mean = _divide_list(numbers, len(numbers))
squared_deviations = [(mean - i) ** 2 for i in numbers]
return math.sqrt(_divide_list(squared_deviations, len(numbers)))

69
test.py Normal file
View File

@ -0,0 +1,69 @@
import timeit
import statistics
import fast_stdev
import random
import numpy
import sys
print("Python", sys.version, "\n")
iterations = 1000
numbers = [1, 2, 4]
numbers = [random.randint(1, 100) for i in range(1000)]
numbers = [random.random() for i in range(1000)]
np_array = numpy.array(numbers)
def time(code):
time = round(
1000000
* timeit.timeit(
code,
number=iterations,
globals=globals(),
)
/ iterations
)
return "(time: " + (str(time)).rjust(4, " ") + "μs)"
print("Sample standard deviation:")
print(
" stdlib : "
+ str(statistics.stdev(numbers)).ljust(20, " ")
+ time("statistics.stdev(numbers)")
)
print(
" fast_stdev : "
+ str(fast_stdev.stdev(numbers)).ljust(20, " ")
+ time("fast_stdev.stdev(numbers)")
)
print(
" numpy : "
+ str(np_array.std(ddof=1)).ljust(20, " ")
+ time("np_array.std(ddof=1)")
)
print("\nPopulation standard deviation:")
print(
" stdlib : "
+ str(statistics.pstdev(numbers)).ljust(20, " ")
+ time("statistics.pstdev(numbers)")
)
print(
" fast_stdev : "
+ str(fast_stdev.pstdev(numbers)).ljust(20, " ")
+ time("fast_stdev.pstdev(numbers)")
)
print(
" numpy : "
+ str(np_array.std()).ljust(20, " ")
+ time("np_array.std()")
)