Source code for test_running_stats
"""Tests for computing mean and std in running batches
"""
import unittest
import torch
import numpy as np
from n2j.trainval_data.utils.running_stats import RunningStats
[docs]class TestRunningStats(unittest.TestCase):
"""A suite of tests verifying mean and std in running batches
"""
@classmethod
[docs] def setUpClass(cls):
"""Set global defaults for tests
"""
np.random.seed(123)
cls.n_data = 100000
# Simulate random data
cls.some_array = np.random.randn(cls.n_data, 2)*np.array([[0.5, 0.1]])
#cls.some_array += np.random.randn(cls.n_data, 2)*np.array([[0.2, 0.0]])
cls.some_array += np.array([[0.1, 0.3]])
cls.some_array = np.exp(cls.some_array) # make lognormal
cls.mu_emp = cls.some_array.mean(axis=0, keepdims=True)
cls.sig_emp = cls.some_array.std(axis=0, keepdims=True)
cls.batch_size = 100
[docs] def test_running_mean_np(self):
"""Test running mean computation on np array
"""
running_mean = np.zeros([1, 2])
for b in range(self.n_data//self.batch_size):
new = self.some_array[b*self.batch_size:(b+1)*self.batch_size, :]
running_mean += (new.mean(axis=0, keepdims=True) - running_mean)/(b+1)
np.testing.assert_array_almost_equal(running_mean, self.mu_emp, decimal=5)
[docs] def test_running_std_np(self):
"""Test running std computation on np array
"""
running_mean = np.zeros([1, 2])
running_var = np.zeros([1, 2])
for b in range(self.n_data//self.batch_size):
new = self.some_array[b*self.batch_size:(b+1)*self.batch_size, :]
new_mean = new.mean(axis=0, keepdims=True)
new_var = new.var(axis=0, keepdims=True)
running_var += (new_var - running_var)/(b+1) + (b/(b+1)**2.0)*(running_mean - new_mean)**2.0
running_mean += (new_mean - running_mean)/(b+1)
# running_std += (new.std(axis=0, keepdims=True) - running_std)/(b+1)
np.testing.assert_array_almost_equal(running_var**0.5, self.sig_emp, decimal=5)
np.testing.assert_array_almost_equal(running_mean, self.mu_emp, decimal=5)
[docs] def test_running_stats(self):
"""Test running mean, std computation on torch using RunningStats
"""
some_array_torch = torch.tensor(self.some_array)
loader_dict = dict(data=lambda x: x)
rs = RunningStats(loader_dict)
for b in range(self.n_data//self.batch_size):
new = some_array_torch[b*self.batch_size:(b+1)*self.batch_size, :]
rs.update(new, b)
np.testing.assert_array_almost_equal(rs.stats['data_var']**0.5, self.sig_emp, decimal=5)
np.testing.assert_array_almost_equal(rs.stats['data_mean'], self.mu_emp, decimal=5)
if __name__ == '__main__':
unittest.main()