Additional itertool recipes for running statistics (gh-148879)

This commit is contained in:
Raymond Hettinger
2026-04-22 11:52:41 -05:00
committed by GitHub
parent 04fd103713
commit b16886528e
+66 -9
View File
@@ -833,6 +833,7 @@ and :term:`generators <generator>` which incur interpreter overhead.
from collections import Counter, deque
from contextlib import suppress
from functools import reduce
from heapq import heappush, heappushpop, heappush_max, heappushpop_max
from math import comb, isqrt, prod, sumprod
from operator import getitem, is_not, itemgetter, mul, neg, truediv
@@ -848,11 +849,6 @@ and :term:`generators <generator>` which incur interpreter overhead.
# prepend(1, [2, 3, 4]) → 1 2 3 4
return chain([value], iterable)
def running_mean(iterable):
"Yield the average of all values seen so far."
# running_mean([8.5, 9.5, 7.5, 6.5]) → 8.5 9.0 8.5 8.0
return map(truediv, accumulate(iterable), count(1))
def repeatfunc(function, times=None, *args):
"Repeat calls to a function with specified arguments."
if times is None:
@@ -1150,6 +1146,49 @@ and :term:`generators <generator>` which incur interpreter overhead.
return n
# ==== Running statistics ====
def running_mean(iterable):
"Average of values seen so far."
# running_mean([37, 33, 38, 28]) → 37 35 36 34
return map(truediv, accumulate(iterable), count(1))
def running_min(iterable):
"Smallest of values seen so far."
# running_min([37, 33, 38, 28]) → 37 33 33 28
return accumulate(iterable, func=min)
def running_max(iterable):
"Largest of values seen so far."
# running_max([37, 33, 38, 28]) → 37 37 38 38
return accumulate(iterable, func=max)
def running_median(iterable):
"Median of values seen so far."
# running_median([37, 33, 38, 28]) → 37 35 37 35
read = iter(iterable).__next__
lo = [] # max-heap
hi = [] # min-heap the same size as or one smaller than lo
with suppress(StopIteration):
while True:
heappush_max(lo, heappushpop(hi, read()))
yield lo[0]
heappush(hi, heappushpop_max(lo, read()))
yield (lo[0] + hi[0]) / 2
def running_statistics(iterable):
"Aggregate statistics for values seen so far."
# Generate tuples: (size, minimum, median, maximum, mean)
t0, t1, t2, t3 = tee(iterable, 4)
return zip(
count(1),
running_min(t0),
running_median(t1),
running_max(t2),
running_mean(t3),
)
.. doctest::
:hide:
@@ -1226,10 +1265,6 @@ and :term:`generators <generator>` which incur interpreter overhead.
[(0, 'a'), (1, 'b'), (2, 'c')]
>>> list(running_mean([8.5, 9.5, 7.5, 6.5]))
[8.5, 9.0, 8.5, 8.0]
>>> for _ in loops(5):
... print('hi')
...
@@ -1789,6 +1824,28 @@ and :term:`generators <generator>` which incur interpreter overhead.
True
>>> list(running_mean([8.5, 9.5, 7.5, 6.5]))
[8.5, 9.0, 8.5, 8.0]
>>> list(running_mean([37, 33, 38, 28]))
[37.0, 35.0, 36.0, 34.0]
>>> list(running_min([37, 33, 38, 28]))
[37, 33, 33, 28]
>>> list(running_max([37, 33, 38, 28]))
[37, 37, 38, 38]
>>> list(running_median([37, 33, 38, 28]))
[37, 35.0, 37, 35.0]
>>> list(running_statistics([37, 33, 38, 28]))
[(1, 37, 37, 37, 37.0), (2, 33, 35.0, 37, 35.0), (3, 33, 37, 38, 36.0), (4, 28, 35.0, 38, 34.0)]
.. testcode::
:hide: