zalando · shansfolder · Mar 7, 2018 · Mar 2, 2018 · Mar 2, 2018 · Mar 5, 2018
diff --git a/expan/core/correction.py b/expan/core/correction.py
@@ -0,0 +1,33 @@
+import numpy as np
+
+
+def benjamini_hochberg(false_discovery_rate, original_p_values):
+    """ Benjamini-Hochberg procedure.
+
+    :param false_discovery_rate: proportion of significant results that are actually false positives
+    :type  false_discovery_rate: float
+    :param original_p_values: p values from all the tests
+    :type  original_p_values: list[float]
+
+    :return: new critical value (i.e. the corrected alpha)
+    :rtype: float
+    """
+    p_values_sorted = np.sort(np.asarray(original_p_values))
+    number_tests = len(original_p_values)
+    significant_ranks = [i for i, val in enumerate(p_values_sorted, 1) if val <= i * false_discovery_rate / number_tests]
+    rank = np.max(significant_ranks) if significant_ranks else 1
+    return rank * false_discovery_rate / number_tests
+
+
+def bonferroni(false_positive_rate, original_p_values):
+    """ Bonferrnoi correction.
+
+    :param false_positive_rate: alpha value before correction
+    :type  false_positive_rate: float
+    :param original_p_values: p values from all the tests
+    :type  original_p_values: list[float]
+
+    :return: new critical value (i.e. the corrected alpha)
+    :rtype: float
+    """
+    return false_positive_rate / len(original_p_values)
diff --git a/expan/core/early_stopping.py b/expan/core/early_stopping.py
@@ -37,9 +37,7 @@ def obrien_fleming(information_fraction, alpha=0.05):
 
 def make_group_sequential(spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8):
     """ A closure to the group_sequential function. """
-    def f(x, y):
-        return group_sequential(x, y, spending_function, estimated_sample_size, alpha, cap)
-    return f
+    return lambda x, y: group_sequential(x, y, spending_function, estimated_sample_size, alpha, cap)
 
 
 def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8):

diff --git a/expan/core/experiment.py b/expan/core/experiment.py
@@ -1,13 +1,17 @@
 import logging
+import warnings
 
 import numpy as np
 import pandas as pd
+import copy
 
 import expan.core.early_stopping as es
 import expan.core.statistics as statx
+import expan.core.correction as correction
 from expan.core.statistical_test import *
-from expan.core.results import StatisticalTestResult, MultipleTestSuiteResult
+from expan.core.results import StatisticalTestResult, MultipleTestSuiteResult, CorrectedTestStatistics
 
+warnings.simplefilter('always', UserWarning)
 logger = logging.getLogger(__name__)
 
 
@@ -22,16 +26,15 @@ def __init__(self, data, metadata):
         :param metadata: additional information about the experiment. (e.g. primary KPI, source, etc)
         :type  metadata: dict
         """
-        self.data         = data.copy()
-        self.metadata     = metadata.copy()
+        self.data         = data.convert_objects(convert_numeric=True)
+        self.metadata     = metadata
         self.worker_table = {
             'fixed_horizon': statx.make_delta,
             'group_sequential': es.make_group_sequential,
             'bayes_factor': es.make_bayes_factor,
             'bayes_precision': es.make_bayes_precision
         }
 
-
     def __str__(self):
         return 'Experiment "{:s}" with {:d} entities.'.format(self.metadata['experiment'], len(self.data))
 
@@ -44,7 +47,8 @@ def analyze_statistical_test(self, test, testmethod, **worker_args):
         :param testmethod: analysis method
         :type  testmethod: str
         :param **worker_args: additional arguments for the analysis method
-        :return: statistical analysis result of the test
+
+        :return: statistical result of the test
         :rtype: StatisticalTestResult
         """
         if not isinstance(test, StatisticalTest):
@@ -112,27 +116,53 @@ def analyze_statistical_test(self, test, testmethod, **worker_args):
 
 
     def analyze_statistical_test_suite(self, test_suite, testmethod='fixed_horizon', **worker_args):
-        """
-        Runs delta analysis on a set of tests and returns statsitical results for each statistical test in the suite.
+        """ Runs delta analysis on a set of tests and returns statistical results for each statistical test in the suite.
 
         :param test_suite: a suite of statistical test to run
         :type  test_suite: StatisticalTestSuite
-        :param testmethod: analysis method
+        :param testmethod: analysis method to perform. 
+                           It can be 'fixed_horizon', 'group_sequential', 'bayes_factor' or 'bayes_precision'.
         :type  testmethod: str
-        :param **worker_args: additional arguments for the analysis method
+        :param **worker_args: additional arguments for the analysis method (see signatures of corresponding methods)
+
         :return: statistical result of the test suite
         :rtype: MultipleTestSuiteResult
         """
         if not isinstance(test_suite, StatisticalTestSuite):
             raise TypeError("Test suite should be of type StatisticalTestSuite.")
 
-        statistical_test_results = MultipleTestSuiteResult([], test_suite.correction_method)
-        for test in test_suite:
-            one_analysis_result = self.analyze_statistical_test(test, testmethod, **worker_args)
-            statistical_test_results.statistical_test_results.append(one_analysis_result)
+        if testmethod not in ['fixed_horizon', 'group_sequential']:
+            test_suite.correction_method = CorrectionMethod.NONE
+        requires_correction = test_suite.correction_method is not CorrectionMethod.NONE
+
+        # look up table for correction method
+        correction_table = {
+            CorrectionMethod.BONFERRONI: correction.bonferroni,
+            CorrectionMethod.BH: correction.benjamini_hochberg
+        }
 
-        # TODO: Implement correction method, create CorrectedTestStatistics, and update the statistical_test_results
-        return statistical_test_results
+        # test_suite_result hold statistical results from all statistical tests
+        test_suite_result = MultipleTestSuiteResult([], test_suite.correction_method)
+        for test in test_suite.tests:
+            original_analysis = self.analyze_statistical_test(test, testmethod, **worker_args)
+            test_suite_result.results.append(original_analysis)
+
+        # if correction is needed, get p values, do correction on alpha, and run the same analysis for new alpha
+        if requires_correction:
+            original_alpha    = worker_args.get('alpha', 0.05)
+            original_p_values = [item.result.p for item in test_suite_result.results if item.result is not None]
+            corrected_alpha   = correction_table[test_suite.correction_method](original_alpha, original_p_values)
+            new_worker_args   = copy.deepcopy(worker_args)
+            new_worker_args['alpha'] = corrected_alpha
+
+            for test_index, test_item in enumerate(test_suite_result.results):
+                if test_item.result:
+                    original_analysis = test_suite_result.results[test_index]
+                    corrected_analysis = self.analyze_statistical_test(test_item.test, testmethod, **new_worker_args)
+                    combined_result = CorrectedTestStatistics(original_analysis.result, corrected_analysis.result)
+                    original_analysis.result = combined_result
+
+        return test_suite_result
 
 
     def outlier_filter(self, kpis, percentile=99.0, threshold_type='upper'):
@@ -145,6 +175,7 @@ def outlier_filter(self, kpis, percentile=99.0, threshold_type='upper'):
         :type  percentile: float
         :param threshold_type: type of threshold used ('lower' or 'upper')
         :type  threshold_type: str
+
         :return: No return value. Will filter out outliers in self.data in place.
         """
         # check if provided KPIs are present in the data
@@ -166,6 +197,7 @@ def outlier_filter(self, kpis, percentile=99.0, threshold_type='upper'):
         self.metadata['filtered_threshold_kind'] = threshold_type
         # throw warning if too many entities have been filtered out
         if (len(flags[flags == True]) / float(len(self.data))) > 0.02:
+            warnings.warn('More than 2% of entities have been filtered out, consider adjusting the percentile value.')
             logger.warning('More than 2% of entities have been filtered out, consider adjusting the percentile value.')
         self.data = self.data[flags == False]
 
@@ -189,21 +221,23 @@ def _is_valid_for_analysis(self, data, test):
 
 
     def _get_weights(self, data, test, variant_name):
-        """ Perform the re-weighting trick. 
+        """ Perform the re-weighting trick on the selected derived kpi
         See http://expan.readthedocs.io/en/latest/glossary.html#per-entity-ratio-vs-ratio-of-totals
 
         :type data: pd.DataFrame
         :type test: StatisticalTest
         :type variant_name: str
-        :rtype: pd.DataFrame
+
+        :return returns re-weighted kpi values of type pd.Series
+        :rtype: pd.Series
         """
         if type(test.kpi) is not DerivedKPI:
             return 1.0
 
-        x = test.variants.get_variant(data, variant_name)
+        x = test.variants.get_variant(data, variant_name)[test.kpi.denominator]
         number_of_zeros_and_nans     = sum(x == 0) + np.isnan(x).sum()
         number_of_non_zeros_and_nans = len(x) - number_of_zeros_and_nans
-        return number_of_non_zeros_and_nans/np.nansum(x) * x
+        return number_of_non_zeros_and_nans / np.nansum(x) * x
 
 
     def _quantile_filtering(self, kpis, percentile, threshold_type):
@@ -216,6 +250,7 @@ def _quantile_filtering(self, kpis, percentile, threshold_type):
         :type  percentile: float
         :param threshold_type: type of threshold used ('lower' or 'upper')
         :type  threshold_type: str
+
         :return: boolean values indicating whether the row should be filtered
         :rtype: pd.Series
         """

diff --git a/expan/core/results.py b/expan/core/results.py
@@ -102,11 +102,11 @@ def __init__(self, test, result):
 class MultipleTestSuiteResult(JsonSerializable):
     """ This class holds the results of a MultipleTestSuite.
 
-    :param statistical_test_results: test results for all statistical testing unit
-    :type  statistical_test_results: list[StatisticalTestResult]
+    :param results: test results for all statistical testing unit
+    :type  results: list[StatisticalTestResult]
     :param correction_method: method used for multiple testing correction
     :type  correction_method: CorrectionMethod
     """
-    def __init__(self, statistical_test_results, correction_method=CorrectionMethod.NONE):
-        self.statistical_test_results = statistical_test_results
+    def __init__(self, results, correction_method=CorrectionMethod.NONE):
+        self.results = results
         self.correction_method = correction_method
diff --git a/expan/core/statistical_test.py b/expan/core/statistical_test.py
@@ -52,12 +52,12 @@ def __init__(self, name, numerator, denominator):
     def make_derived_kpi(self, data):
         """ Create the derived kpi column if it is not yet created. """
         if self.name not in data.columns:
-            data.loc[:, self.name] = data[self.numerator]/data[self.denominator].astype(float)
+            data.loc[:, self.name] = (data[self.numerator]/data[self.denominator]).astype("float64")
 
 
 class CorrectionMethod(Enum):
     NONE       = 1   # no correction
-    BONFERRONI = 2   # Bonferrnoi correction. Used to correct false positive rate.
+    BONFERRONI = 2   # Bonferroni correction. Used to correct false positive rate.
     BH         = 3   # Benjamini-Hochberg procedure. Used to correct false discovery rate.
 
 
@@ -70,6 +70,8 @@ class StatisticalTestSuite(JsonSerializable):
     :type  correction_method: CorrectionMethod
     """
     def __init__(self, tests, correction_method=CorrectionMethod.NONE):
+        if len(tests) is 1:
+            correction_method = CorrectionMethod.NONE
         self.tests = tests
         self.correction_method = correction_method
 

diff --git a/expan/core/statistics.py b/expan/core/statistics.py
@@ -14,17 +14,13 @@ def _delta_mean(x, y):
     Implemented as function to allow being called from bootstrap. """
     return np.nanmean(x) - np.nanmean(y)
 
-def make_delta(assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
-               min_observations=20, nruns=10000, relative=False):
+
+def make_delta(assume_normal=True, alpha=0.05, min_observations=20, nruns=10000, relative=False):
     """ A closure to the delta function. """
-    def f(x, y):
-        return delta(x, y, assume_normal, alpha, percentiles, min_observations,
-                     nruns, relative)
-    return f
+    return lambda x, y: delta(x, y, assume_normal, alpha, min_observations, nruns, relative)
 
 
-def delta(x, y, assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
-          min_observations=20, nruns=10000, relative=False):
+def delta(x, y, assume_normal=True, alpha=0.05, min_observations=20, nruns=10000, relative=False):
     """ Calculates the difference of means between the samples in a statistical sense.
     Computation is done in form of treatment minus control, i.e. x-y.
     Note that NaNs are treated as if they do not exist in the data. 
@@ -37,8 +33,6 @@ def delta(x, y, assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
     :type  assume_normal: boolean
     :param alpha: significance level (alpha)
     :type  alpha: float
-    :param percentiles: list of percentile values for confidence bounds
-    :type  percentiles: list
     :param min_observations: minimum number of observations needed
     :type  min_observations: int
     :param nruns: only used if assume normal is false
@@ -61,6 +55,8 @@ def delta(x, y, assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
     if type(x) != type(y):
         raise TypeError('Please provide samples of the same type.')
 
+    percentiles = [alpha * 100 / 2, 100 - alpha * 100 / 2]
+
     # Coercing missing values to right format
     _x = np.array(x, dtype=float)
     _y = np.array(y, dtype=float)

diff --git a/expan/core/util.py b/expan/core/util.py
@@ -96,22 +96,3 @@ def generate_random_data():
         'experiment': 'random_data_generation'
     }
     return data, metadata
-
-
-def generate_random_data_n_variants(n_variants=3):
-    """ Generate random data for multiple variants. """
-    np.random.seed(42)
-    size = 10000
-    data = pd.DataFrame()
-    data['entity'] = list(range(size))
-    data['variant'] = np.random.choice(list(map(chr, list(range(65, 65 + n_variants)))), size=size)
-    data['normal_same'] = np.random.normal(size=size)
-    data['poisson_same'] = np.random.poisson(size=size)
-    data['feature'] = np.random.choice(['has', 'non'], size=size)
-    data['treatment_start_time'] = np.random.choice(list(range(10)), size=size)
-    metadata = {
-        'primary_KPI': 'normal_same',
-        'source': 'simulated',
-        'experiment': 'random_data_generation'
-    }
-    return data, metadata
diff --git a/expan/data/csv_fetcher.py b/expan/data/csv_fetcher.py
@@ -1,6 +1,3 @@
-"""CSV fetcher module.
-"""
-
 import logging
 from os import listdir
 from os.path import isfile, join
@@ -12,40 +9,33 @@
 
 logger = logging.getLogger(__name__)
 
-def get_data(controlVariantName, folder_path):
-    """
-    Expects as input a folder containing the following files:
-     - one .csv or .csv.gz with 'metrics' in the filename
-     - one .txt containing 'metadata' in the filename
 
+def get_data(folder_path):
+    """ Expects as input a folder containing the following files:
+     - one .csv or .csv.gz with 'data' in the filename
+     - one .json containing 'metadata' in the filename
     Opens the files and uses them to create an Experiment object which it then returns.
 
-    Args:
-        folder_path:
-
-    Returns:
-        Experiment: Experiment object with loaded csv data
+    :param folder_path: path to the Experiment data
+    :type  folder_path: str
+    :return: Experiment object with data
+    :rtype:  Experiment
 
     """
     files = [f for f in listdir(folder_path) if isfile(join(folder_path, f))]
 
     try:
-        assert ('metrics' in '-'.join(files))
+        assert ('data' in '-'.join(files))
         assert ('metadata' in '-'.join(files))
-
-        metrics = metadata = None
-
+        data = metadata = None
         for f in files:
-
-            if 'metrics' in f:
-                metrics = pd.read_csv(join(folder_path, f))
-
-            elif 'metadata' in f:
+            if 'metadata' in f:
                 with open(join(folder_path, f), 'r') as input_json:
                     metadata = json.load(input_json)
-
-        return Experiment(controlVariantName, metrics, metadata)
+            elif 'data' in f:
+                data = pd.read_csv(join(folder_path, f))
+        return Experiment(data, metadata)
 
     except AssertionError as e:
-        logger.error("An error occured when fetching data from csv file.")
+        logger.error("An error occurred when fetching data from csv file.")
         raise e
diff --git a/tests/tests_core/test_binning.py b/tests/tests_core/test_binning.py
@@ -1,3 +1,5 @@
+# TODO: This module is deprecated
+
 import sys
 import unittest