Skip to content
This repository has been archived by the owner on Feb 17, 2023. It is now read-only.

Commit

Permalink
Merge pull request #86 from esc24/mdtol_for_mean
Browse files Browse the repository at this point in the history
Mdtol for mean
  • Loading branch information
rhattersley committed Jun 13, 2014
2 parents 50d579d + b166690 commit aab46da
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 3 deletions.
31 changes: 29 additions & 2 deletions biggus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1335,6 +1335,11 @@ def process_data(self, data):


class _MeanStreamsHandler(_AggregationStreamsHandler):
def __init__(self, array, axis, mdtol):
# The mdtol argument is not applicable to non-masked arrays
# so it is ignored.
super(_MeanStreamsHandler, self).__init__(array, axis)

def bootstrap(self, shape):
self.running_total = np.zeros(shape, dtype=self.array.dtype)

Expand All @@ -1351,8 +1356,13 @@ def process_data(self, data):


class _MeanMaskedStreamsHandler(_AggregationStreamsHandler):
def __init__(self, array, axis, mdtol):
self._mdtol = mdtol
super(_MeanMaskedStreamsHandler, self).__init__(array, axis)

def bootstrap(self, shape):
self.running_count = np.zeros(shape, dtype=self.array.dtype)
self.running_masked_count = np.zeros(shape, dtype=self.array.dtype)
self.running_total = np.zeros(shape, dtype=self.array.dtype)

def finalise(self):
Expand All @@ -1363,11 +1373,19 @@ def finalise(self):
# Promote array-scalar to 0-dimensional array.
if array.ndim == 0:
array = np.ma.array(array)
# Apply masked/missing data threshold (mdtol).
if self._mdtol < 1:
mask_update = np.true_divide(self.running_masked_count,
self.running_masked_count +
self.running_count) > self._mdtol
array.mask |= mask_update

chunk = Chunk(self.current_keys, array)
return chunk

def process_data(self, data):
self.running_count += np.ma.count(data, axis=self.axis)
self.running_masked_count += np.ma.count_masked(data, axis=self.axis)
self.running_total += np.sum(data, axis=self.axis)


Expand Down Expand Up @@ -1569,7 +1587,7 @@ def _normalise_axis(axis, array):
return axes


def mean(a, axis=None):
def mean(a, axis=None, mdtol=1):
"""
Request the mean of an Array over any number of axes.
Expand All @@ -1583,16 +1601,25 @@ def mean(a, axis=None):
If axis is a tuple of ints, the operation is performed
over multiple axes.
:type axis: None, or int, or iterable of ints.
:param float mdtol: Tolerance of missing data. The value in each
element of the resulting array will be masked if the
fraction of masked data contributing to that element
exceeds mdtol. mdtol=0 means no missing data is
tolerated while mdtol=1 will mean the resulting
element will be masked if and only if all the
contributing elements of the source array are masked.
Defaults to 1.
:return: The Array representing the requested mean.
:rtype: Array
"""
axes = _normalise_axis(axis, a)
assert axes is not None and len(axes) == 1
dtype = (np.array([0], dtype=a.dtype) / 1.).dtype
kwargs = dict(mdtol=mdtol)
return _Aggregation(a, axes[0],
_MeanStreamsHandler, _MeanMaskedStreamsHandler,
dtype, {})
dtype, kwargs)


def std(a, axis=None, ddof=0):
Expand Down
79 changes: 78 additions & 1 deletion biggus/tests/test_aggregation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# (C) British Crown Copyright 2013, Met Office
# (C) British Crown Copyright 2013 - 2014, Met Office
#
# This file is part of Biggus.
#
Expand All @@ -17,6 +17,8 @@
import unittest

import numpy as np
import numpy.ma
import numpy.testing

import biggus
from biggus.tests import AccessCounter
Expand Down Expand Up @@ -95,6 +97,81 @@ def test_add_non_supported_type(self):
biggus.mean(range(10), axis=0)


class TestMdtol(unittest.TestCase):
def setUp(self):
self.data = np.ma.arange(12).reshape(3, 4)
self.data[2, 1:] = np.ma.masked

def _test_mean_with_mdtol(self, data, axis, numpy_result, mdtol=None):
data = AccessCounter(data)
array = biggus.NumpyArrayAdapter(data)

# Perform aggregation.
if mdtol is None:
# Allow testing of default when mdtol is None.
biggus_aggregation = biggus.mean(array, axis=axis)
else:
biggus_aggregation = biggus.mean(array, axis=axis, mdtol=mdtol)

# Check the aggregation operation doesn't actually read any data.
self.assertTrue((data.counts == 0).all())

# Check results.
biggus_result = biggus_aggregation.masked_array()
# Check resolving `op_array` to a NumPy array only reads
# each relevant source value once.
self.assertTrue((data.counts <= 1).all())
numpy_mask = np.ma.getmaskarray(numpy_result)
biggus_mask = np.ma.getmaskarray(biggus_result)
np.testing.assert_array_equal(biggus_mask, numpy_mask)
np.testing.assert_array_equal(biggus_result[~biggus_mask].data,
numpy_result[~numpy_mask].data)

def test_mean_mdtol_default(self):
axis = 0
expected = np.ma.mean(self.data, axis)
self._test_mean_with_mdtol(self.data, axis, expected)

def test_mean_mdtol_one(self):
axis = 0
mdtol = 1
expected = np.ma.mean(self.data, axis)
self._test_mean_with_mdtol(self.data, axis, expected, mdtol)

def test_mean_mdtol_zero(self):
axis = 0
mdtol = 0
expected = np.ma.mean(self.data, axis)
expected.mask = [False, True, True, True]
self._test_mean_with_mdtol(self.data, axis, expected, mdtol)

def test_mean_mdtol_fraction_below_axis_zero(self):
axis = 0
mdtol = 0.32
expected = np.ma.mean(self.data, axis)
expected.mask = [False, True, True, True]
self._test_mean_with_mdtol(self.data, axis, expected, mdtol)

def test_mean_mdtol_fraction_above_axis_zero(self):
axis = 0
mdtol = 0.34
expected = np.ma.mean(self.data, axis)
self._test_mean_with_mdtol(self.data, axis, expected, mdtol)

def test_mean_mdtol_fraction_below_axis_one(self):
axis = 1
mdtol = 0.74
expected = np.ma.mean(self.data, axis)
expected.mask = [False, False, True]
self._test_mean_with_mdtol(self.data, axis, expected, mdtol)

def test_mean_mdtol_fraction_above_axis_one(self):
axis = 1
mdtol = 0.76
expected = np.ma.mean(self.data, axis)
self._test_mean_with_mdtol(self.data, axis, expected, mdtol)


class TestFlow(unittest.TestCase):
def _test_flow(self, axis):
data = np.arange(3 * 4 * 5, dtype='f4').reshape(3, 4, 5)
Expand Down

0 comments on commit aab46da

Please sign in to comment.