Skip to content

Multiple correction method module #201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
Mar 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions expan/core/correction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import numpy as np


def benjamini_hochberg(false_discovery_rate, original_p_values):
""" Benjamini-Hochberg procedure.

:param false_discovery_rate: proportion of significant results that are actually false positives
:type false_discovery_rate: float
:param original_p_values: p values from all the tests
:type original_p_values: list[float]

:return: new critical value (i.e. the corrected alpha)
:rtype: float
"""
p_values_sorted = np.sort(np.asarray(original_p_values))
number_tests = len(original_p_values)
significant_ranks = [i for i, val in enumerate(p_values_sorted, 1) if val <= i * false_discovery_rate / number_tests]
rank = np.max(significant_ranks) if significant_ranks else 1
return rank * false_discovery_rate / number_tests


def bonferroni(false_positive_rate, original_p_values):
""" Bonferrnoi correction.

:param false_positive_rate: alpha value before correction
:type false_positive_rate: float
:param original_p_values: p values from all the tests
:type original_p_values: list[float]

:return: new critical value (i.e. the corrected alpha)
:rtype: float
"""
return false_positive_rate / len(original_p_values)
4 changes: 1 addition & 3 deletions expan/core/early_stopping.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,7 @@ def obrien_fleming(information_fraction, alpha=0.05):

def make_group_sequential(spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8):
""" A closure to the group_sequential function. """
def f(x, y):
return group_sequential(x, y, spending_function, estimated_sample_size, alpha, cap)
return f
return lambda x, y: group_sequential(x, y, spending_function, estimated_sample_size, alpha, cap)


def group_sequential(x, y, spending_function='obrien_fleming', estimated_sample_size=None, alpha=0.05, cap=8):
Expand Down
73 changes: 54 additions & 19 deletions expan/core/experiment.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,17 @@
import logging
import warnings

import numpy as np
import pandas as pd
import copy

import expan.core.early_stopping as es
import expan.core.statistics as statx
import expan.core.correction as correction
from expan.core.statistical_test import *
from expan.core.results import StatisticalTestResult, MultipleTestSuiteResult
from expan.core.results import StatisticalTestResult, MultipleTestSuiteResult, CorrectedTestStatistics

warnings.simplefilter('always', UserWarning)
logger = logging.getLogger(__name__)


Expand All @@ -22,16 +26,15 @@ def __init__(self, data, metadata):
:param metadata: additional information about the experiment. (e.g. primary KPI, source, etc)
:type metadata: dict
"""
self.data = data.copy()
self.metadata = metadata.copy()
self.data = data.convert_objects(convert_numeric=True)
self.metadata = metadata
self.worker_table = {
'fixed_horizon': statx.make_delta,
'group_sequential': es.make_group_sequential,
'bayes_factor': es.make_bayes_factor,
'bayes_precision': es.make_bayes_precision
}


def __str__(self):
return 'Experiment "{:s}" with {:d} entities.'.format(self.metadata['experiment'], len(self.data))

Expand All @@ -44,7 +47,8 @@ def analyze_statistical_test(self, test, testmethod, **worker_args):
:param testmethod: analysis method
:type testmethod: str
:param **worker_args: additional arguments for the analysis method
:return: statistical analysis result of the test

:return: statistical result of the test
:rtype: StatisticalTestResult
"""
if not isinstance(test, StatisticalTest):
Expand Down Expand Up @@ -112,27 +116,53 @@ def analyze_statistical_test(self, test, testmethod, **worker_args):


def analyze_statistical_test_suite(self, test_suite, testmethod='fixed_horizon', **worker_args):
"""
Runs delta analysis on a set of tests and returns statsitical results for each statistical test in the suite.
""" Runs delta analysis on a set of tests and returns statistical results for each statistical test in the suite.

:param test_suite: a suite of statistical test to run
:type test_suite: StatisticalTestSuite
:param testmethod: analysis method
:param testmethod: analysis method to perform.
It can be 'fixed_horizon', 'group_sequential', 'bayes_factor' or 'bayes_precision'.
:type testmethod: str
:param **worker_args: additional arguments for the analysis method
:param **worker_args: additional arguments for the analysis method (see signatures of corresponding methods)

:return: statistical result of the test suite
:rtype: MultipleTestSuiteResult
"""
if not isinstance(test_suite, StatisticalTestSuite):
raise TypeError("Test suite should be of type StatisticalTestSuite.")

statistical_test_results = MultipleTestSuiteResult([], test_suite.correction_method)
for test in test_suite:
one_analysis_result = self.analyze_statistical_test(test, testmethod, **worker_args)
statistical_test_results.statistical_test_results.append(one_analysis_result)
if testmethod not in ['fixed_horizon', 'group_sequential']:
test_suite.correction_method = CorrectionMethod.NONE
requires_correction = test_suite.correction_method is not CorrectionMethod.NONE

# look up table for correction method
correction_table = {
CorrectionMethod.BONFERRONI: correction.bonferroni,
CorrectionMethod.BH: correction.benjamini_hochberg
}

# TODO: Implement correction method, create CorrectedTestStatistics, and update the statistical_test_results
return statistical_test_results
# test_suite_result hold statistical results from all statistical tests
test_suite_result = MultipleTestSuiteResult([], test_suite.correction_method)
for test in test_suite.tests:
original_analysis = self.analyze_statistical_test(test, testmethod, **worker_args)
test_suite_result.results.append(original_analysis)

# if correction is needed, get p values, do correction on alpha, and run the same analysis for new alpha
if requires_correction:
original_alpha = worker_args.get('alpha', 0.05)
original_p_values = [item.result.p for item in test_suite_result.results if item.result is not None]
corrected_alpha = correction_table[test_suite.correction_method](original_alpha, original_p_values)
new_worker_args = copy.deepcopy(worker_args)
new_worker_args['alpha'] = corrected_alpha

for test_index, test_item in enumerate(test_suite_result.results):
if test_item.result:
original_analysis = test_suite_result.results[test_index]
corrected_analysis = self.analyze_statistical_test(test_item.test, testmethod, **new_worker_args)
combined_result = CorrectedTestStatistics(original_analysis.result, corrected_analysis.result)
original_analysis.result = combined_result

return test_suite_result


def outlier_filter(self, kpis, percentile=99.0, threshold_type='upper'):
Expand All @@ -145,6 +175,7 @@ def outlier_filter(self, kpis, percentile=99.0, threshold_type='upper'):
:type percentile: float
:param threshold_type: type of threshold used ('lower' or 'upper')
:type threshold_type: str

:return: No return value. Will filter out outliers in self.data in place.
"""
# check if provided KPIs are present in the data
Expand All @@ -166,6 +197,7 @@ def outlier_filter(self, kpis, percentile=99.0, threshold_type='upper'):
self.metadata['filtered_threshold_kind'] = threshold_type
# throw warning if too many entities have been filtered out
if (len(flags[flags == True]) / float(len(self.data))) > 0.02:
warnings.warn('More than 2% of entities have been filtered out, consider adjusting the percentile value.')
logger.warning('More than 2% of entities have been filtered out, consider adjusting the percentile value.')
self.data = self.data[flags == False]

Expand All @@ -189,21 +221,23 @@ def _is_valid_for_analysis(self, data, test):


def _get_weights(self, data, test, variant_name):
""" Perform the re-weighting trick.
""" Perform the re-weighting trick on the selected derived kpi
See http://expan.readthedocs.io/en/latest/glossary.html#per-entity-ratio-vs-ratio-of-totals

:type data: pd.DataFrame
:type test: StatisticalTest
:type variant_name: str
:rtype: pd.DataFrame

:return returns re-weighted kpi values of type pd.Series
:rtype: pd.Series
"""
if type(test.kpi) is not DerivedKPI:
return 1.0

x = test.variants.get_variant(data, variant_name)
x = test.variants.get_variant(data, variant_name)[test.kpi.denominator]
number_of_zeros_and_nans = sum(x == 0) + np.isnan(x).sum()
number_of_non_zeros_and_nans = len(x) - number_of_zeros_and_nans
return number_of_non_zeros_and_nans/np.nansum(x) * x
return number_of_non_zeros_and_nans / np.nansum(x) * x


def _quantile_filtering(self, kpis, percentile, threshold_type):
Expand All @@ -216,6 +250,7 @@ def _quantile_filtering(self, kpis, percentile, threshold_type):
:type percentile: float
:param threshold_type: type of threshold used ('lower' or 'upper')
:type threshold_type: str

:return: boolean values indicating whether the row should be filtered
:rtype: pd.Series
"""
Expand Down
8 changes: 4 additions & 4 deletions expan/core/results.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,11 +102,11 @@ def __init__(self, test, result):
class MultipleTestSuiteResult(JsonSerializable):
""" This class holds the results of a MultipleTestSuite.

:param statistical_test_results: test results for all statistical testing unit
:type statistical_test_results: list[StatisticalTestResult]
:param results: test results for all statistical testing unit
:type results: list[StatisticalTestResult]
:param correction_method: method used for multiple testing correction
:type correction_method: CorrectionMethod
"""
def __init__(self, statistical_test_results, correction_method=CorrectionMethod.NONE):
self.statistical_test_results = statistical_test_results
def __init__(self, results, correction_method=CorrectionMethod.NONE):
self.results = results
self.correction_method = correction_method
6 changes: 4 additions & 2 deletions expan/core/statistical_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,12 +52,12 @@ def __init__(self, name, numerator, denominator):
def make_derived_kpi(self, data):
""" Create the derived kpi column if it is not yet created. """
if self.name not in data.columns:
data.loc[:, self.name] = data[self.numerator]/data[self.denominator].astype(float)
data.loc[:, self.name] = (data[self.numerator]/data[self.denominator]).astype("float64")


class CorrectionMethod(Enum):
NONE = 1 # no correction
BONFERRONI = 2 # Bonferrnoi correction. Used to correct false positive rate.
BONFERRONI = 2 # Bonferroni correction. Used to correct false positive rate.
BH = 3 # Benjamini-Hochberg procedure. Used to correct false discovery rate.


Expand All @@ -70,6 +70,8 @@ class StatisticalTestSuite(JsonSerializable):
:type correction_method: CorrectionMethod
"""
def __init__(self, tests, correction_method=CorrectionMethod.NONE):
if len(tests) is 1:
correction_method = CorrectionMethod.NONE
self.tests = tests
self.correction_method = correction_method

Expand Down
16 changes: 6 additions & 10 deletions expan/core/statistics.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,13 @@ def _delta_mean(x, y):
Implemented as function to allow being called from bootstrap. """
return np.nanmean(x) - np.nanmean(y)

def make_delta(assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
min_observations=20, nruns=10000, relative=False):

def make_delta(assume_normal=True, alpha=0.05, min_observations=20, nruns=10000, relative=False):
""" A closure to the delta function. """
def f(x, y):
return delta(x, y, assume_normal, alpha, percentiles, min_observations,
nruns, relative)
return f
return lambda x, y: delta(x, y, assume_normal, alpha, min_observations, nruns, relative)


def delta(x, y, assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
min_observations=20, nruns=10000, relative=False):
def delta(x, y, assume_normal=True, alpha=0.05, min_observations=20, nruns=10000, relative=False):
""" Calculates the difference of means between the samples in a statistical sense.
Computation is done in form of treatment minus control, i.e. x-y.
Note that NaNs are treated as if they do not exist in the data.
Expand All @@ -37,8 +33,6 @@ def delta(x, y, assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
:type assume_normal: boolean
:param alpha: significance level (alpha)
:type alpha: float
:param percentiles: list of percentile values for confidence bounds
:type percentiles: list
:param min_observations: minimum number of observations needed
:type min_observations: int
:param nruns: only used if assume normal is false
Expand All @@ -61,6 +55,8 @@ def delta(x, y, assume_normal=True, alpha=0.05, percentiles=[2.5, 97.5],
if type(x) != type(y):
raise TypeError('Please provide samples of the same type.')

percentiles = [alpha * 100 / 2, 100 - alpha * 100 / 2]

# Coercing missing values to right format
_x = np.array(x, dtype=float)
_y = np.array(y, dtype=float)
Expand Down
19 changes: 0 additions & 19 deletions expan/core/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,22 +96,3 @@ def generate_random_data():
'experiment': 'random_data_generation'
}
return data, metadata


def generate_random_data_n_variants(n_variants=3):
""" Generate random data for multiple variants. """
np.random.seed(42)
size = 10000
data = pd.DataFrame()
data['entity'] = list(range(size))
data['variant'] = np.random.choice(list(map(chr, list(range(65, 65 + n_variants)))), size=size)
data['normal_same'] = np.random.normal(size=size)
data['poisson_same'] = np.random.poisson(size=size)
data['feature'] = np.random.choice(['has', 'non'], size=size)
data['treatment_start_time'] = np.random.choice(list(range(10)), size=size)
metadata = {
'primary_KPI': 'normal_same',
'source': 'simulated',
'experiment': 'random_data_generation'
}
return data, metadata
40 changes: 15 additions & 25 deletions expan/data/csv_fetcher.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,3 @@
"""CSV fetcher module.
"""

import logging
from os import listdir
from os.path import isfile, join
Expand All @@ -12,40 +9,33 @@

logger = logging.getLogger(__name__)

def get_data(controlVariantName, folder_path):
"""
Expects as input a folder containing the following files:
- one .csv or .csv.gz with 'metrics' in the filename
- one .txt containing 'metadata' in the filename

def get_data(folder_path):
""" Expects as input a folder containing the following files:
- one .csv or .csv.gz with 'data' in the filename
- one .json containing 'metadata' in the filename
Opens the files and uses them to create an Experiment object which it then returns.

Args:
folder_path:

Returns:
Experiment: Experiment object with loaded csv data
:param folder_path: path to the Experiment data
:type folder_path: str
:return: Experiment object with data
:rtype: Experiment

"""
files = [f for f in listdir(folder_path) if isfile(join(folder_path, f))]

try:
assert ('metrics' in '-'.join(files))
assert ('data' in '-'.join(files))
assert ('metadata' in '-'.join(files))

metrics = metadata = None

data = metadata = None
for f in files:

if 'metrics' in f:
metrics = pd.read_csv(join(folder_path, f))

elif 'metadata' in f:
if 'metadata' in f:
with open(join(folder_path, f), 'r') as input_json:
metadata = json.load(input_json)

return Experiment(controlVariantName, metrics, metadata)
elif 'data' in f:
data = pd.read_csv(join(folder_path, f))
return Experiment(data, metadata)

except AssertionError as e:
logger.error("An error occured when fetching data from csv file.")
logger.error("An error occurred when fetching data from csv file.")
raise e
2 changes: 2 additions & 0 deletions tests/tests_core/test_binning.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
# TODO: This module is deprecated

import sys
import unittest

Expand Down
Loading