diff --git a/Doc/library/statistics.rst b/Doc/library/statistics.rst index cbb131855dc664..bdf74fc01000aa 100644 --- a/Doc/library/statistics.rst +++ b/Doc/library/statistics.rst @@ -716,8 +716,8 @@ However, for reading convenience, most of the examples show sorted sequences. Return the sample covariance of two inputs *x* and *y*. Covariance is a measure of the joint variability of two inputs. - Both inputs must be of the same length (no less than two), otherwise - :exc:`StatisticsError` is raised. + Both inputs must be sequences or iterables of the same length (no less + than two), otherwise :exc:`StatisticsError` is raised. Examples: @@ -753,8 +753,9 @@ However, for reading convenience, most of the examples show sorted sequences. continuous data that doesn't meet the linear proportion requirement for Pearson's correlation coefficient. - Both inputs must be of the same length (no less than two), and need - not to be constant, otherwise :exc:`StatisticsError` is raised. + Both inputs must be sequences or iterables of the same length (no less + than two), and need not to be constant, otherwise :exc:`StatisticsError` + is raised. Example with `Kepler's laws of planetary motion `_: @@ -802,8 +803,8 @@ However, for reading convenience, most of the examples show sorted sequences. (it is equal to the difference between predicted and actual values of the dependent variable). - Both inputs must be of the same length (no less than two), and - the independent variable *x* cannot be constant; + Both inputs must be sequences or iterables of the same length (no less + than two), and the independent variable *x* cannot be constant; otherwise a :exc:`StatisticsError` is raised. For example, we can use the `release dates of the Monty diff --git a/Lib/statistics.py b/Lib/statistics.py index 32fcf2313a815a..efce4a3867b215 100644 --- a/Lib/statistics.py +++ b/Lib/statistics.py @@ -673,6 +673,8 @@ def covariance(x, y, /): """ # https://en.wikipedia.org/wiki/Covariance + x = list(x) + y = list(y) n = len(x) if len(y) != n: raise StatisticsError('covariance requires that both inputs have same number of data points') @@ -710,6 +712,8 @@ def correlation(x, y, /, *, method='linear'): """ # https://en.wikipedia.org/wiki/Pearson_correlation_coefficient # https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient + x = list(x) + y = list(y) n = len(x) if len(y) != n: raise StatisticsError('correlation requires that both inputs have same number of data points') @@ -781,6 +785,8 @@ def linear_regression(x, y, /, *, proportional=False): """ # https://en.wikipedia.org/wiki/Simple_linear_regression + x = list(x) + y = list(y) n = len(x) if len(y) != n: raise StatisticsError('linear regression requires that both inputs have same number of data points') diff --git a/Lib/test/test_statistics.py b/Lib/test/test_statistics.py index 677a87b51b9192..5236ac07c688fc 100644 --- a/Lib/test/test_statistics.py +++ b/Lib/test/test_statistics.py @@ -2842,6 +2842,32 @@ def test_correlation_spearman(self): with self.assertRaises(ValueError): statistics.correlation(reading, mathematics, method='bad_method') + def test_iterator_inputs(self): + x = [1, 2, 3, 4, 5, 6, 7, 8, 9] + y = [1, 2, 3, 1, 2, 3, 1, 2, 3] + expected_cov = statistics.covariance(x, y) + expected_cor = statistics.correlation(x, y) + # iter() inputs should give same results as list inputs + self.assertAlmostEqual(statistics.covariance(iter(x), iter(y)), expected_cov) + self.assertAlmostEqual(statistics.correlation(iter(x), iter(y)), expected_cor) + # generator expressions should also work + self.assertAlmostEqual( + statistics.covariance((v for v in x), (v for v in y)), expected_cov + ) + self.assertAlmostEqual( + statistics.correlation((v for v in x), (v for v in y)), expected_cor + ) + # ranked method should also accept iterators + expected_ranked = statistics.correlation(x, y, method='ranked') + self.assertAlmostEqual( + statistics.correlation(iter(x), iter(y), method='ranked'), expected_ranked + ) + # mismatched lengths should still raise StatisticsError + with self.assertRaises(statistics.StatisticsError): + statistics.covariance(iter([1, 2, 3]), iter([1, 2])) + with self.assertRaises(statistics.StatisticsError): + statistics.correlation(iter([1, 2, 3]), iter([1, 2])) + class TestLinearRegression(unittest.TestCase): def test_constant_input_error(self): @@ -2881,6 +2907,26 @@ def test_float_output(self): self.assertTrue(isinstance(slope, float)) self.assertTrue(isinstance(intercept, float)) + def test_iterator_inputs(self): + x = [1, 2, 3, 4, 5] + y = [2, 4, 6, 8, 10] + expected = statistics.linear_regression(x, y) + # iter() inputs should give same results as list inputs + result = statistics.linear_regression(iter(x), iter(y)) + self.assertAlmostEqual(result.slope, expected.slope) + self.assertAlmostEqual(result.intercept, expected.intercept) + # generator expressions should also work + result = statistics.linear_regression((v for v in x), (v for v in y)) + self.assertAlmostEqual(result.slope, expected.slope) + self.assertAlmostEqual(result.intercept, expected.intercept) + # proportional=True should also accept iterators + expected_prop = statistics.linear_regression(x, y, proportional=True) + result_prop = statistics.linear_regression( + iter(x), iter(y), proportional=True + ) + self.assertAlmostEqual(result_prop.slope, expected_prop.slope) + self.assertEqual(result_prop.intercept, 0.0) + class TestNormalDist: # General note on precision: The pdf(), cdf(), and overlap() methods diff --git a/Misc/NEWS.d/next/Library/2026-05-01-17-20-47.gh-issue-149244.Rk3mXp.rst b/Misc/NEWS.d/next/Library/2026-05-01-17-20-47.gh-issue-149244.Rk3mXp.rst new file mode 100644 index 00000000000000..5131eddde3bffc --- /dev/null +++ b/Misc/NEWS.d/next/Library/2026-05-01-17-20-47.gh-issue-149244.Rk3mXp.rst @@ -0,0 +1,3 @@ +:func:`statistics.covariance`, :func:`statistics.correlation`, and +:func:`statistics.linear_regression` now accept any iterable input, +consistent with other functions in the :mod:`statistics` module.