-
Notifications
You must be signed in to change notification settings - Fork 49
Project Headache #194
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Project Headache #194
Changes from 47 commits
3728e6b
45a1dbc
d68a897
67eaa05
9e986ee
99ebea3
b77b6ce
179723b
5470c64
90d58ab
625a21b
53d3a52
cc04de8
e91b2c8
4f1254b
3f4e0ab
cb86b8c
a4e514e
c8d0cd7
9953c9a
79cdafe
39c4c68
febcbaf
63e2d6d
f83a604
2a3b7ee
fed46b3
bf212fb
86e8ea5
0df29f5
adfd52f
42ae926
fcee28e
3b9a49d
3daa439
0eadc8b
65806dc
ae72e66
12b330a
e2b8f74
f9e71ed
7a38c96
32884c2
d8699ab
d065756
8d0db33
005cffa
ee89919
6ab897a
717c0a6
a09bed6
1e1b269
7998bef
6076e00
c63767f
f6bdd7e
c50ef62
1cb449d
4647e74
ed68813
b51b28c
9cf5007
8a29d03
0a4c983
f0c06c0
bc8ebd5
fe1590a
e2abd26
a3dc800
85392ec
f78116b
d0a94fa
72dfe3f
2d5becf
d7a01bf
5d80948
296ed7a
24ef447
85defea
3accd73
cefa11c
06a2dc5
5fa1a7b
406c39b
4021929
3a78229
38382e7
c649c8d
9c15a89
823506e
fd60447
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
from expan.core.util import JsonSerializable | ||
|
||
|
||
# --------- Below are the data structure of statistics --------- # | ||
class BaseTestStatistics(JsonSerializable): | ||
""" Holds only statistics for the control and treatment group. | ||
|
||
:param control_statistics: statistics within the control group | ||
:type control_statistics: SampleStatistics | ||
:param treatment_statistics: statistics within the treatment group | ||
:type treatment_statistics: SampleStatistics | ||
""" | ||
def __init__(self, control_statistics, treatment_statistics): | ||
self.control_statistics = control_statistics | ||
self.treatment_statistics = treatment_statistics | ||
|
||
|
||
class SampleStatistics(JsonSerializable): | ||
""" This class holds sample size, mean and variance. | ||
|
||
:type sample_size: int | ||
:type mean: float | ||
:type variance: float | ||
""" | ||
def __init__(self, sample_size, mean, variance): | ||
self.sample_size = sample_size | ||
self.mean = mean | ||
self.variance = variance | ||
|
||
|
||
class SimpleTestStatistics(BaseTestStatistics): | ||
""" Additionally to BaseTestStatistics, holds delta, confidence interval, statistical power, and p value. | ||
|
||
:type control_statistics: SampleStatistics | ||
:type treatment_statistics: SampleStatistics | ||
:type delta: float | ||
:type p: float | ||
:type statistical_power: float | ||
:param ci: a dict where keys are percentiles and values are the corresponding value for the statistic. | ||
:type ci: dict | ||
""" | ||
def __init__(self, control_statistics, treatment_statistics, delta, ci, p, statistical_power): | ||
super(SimpleTestStatistics, self).__init__(control_statistics, treatment_statistics) | ||
self.delta = delta | ||
self.p = p | ||
self.statistical_power = statistical_power | ||
# TODO: think of structure {p: v} (the same as ci) | ||
self.confidence_interval = [{'percentile': p, 'value': v} for (p, v) in ci.items()] | ||
|
||
|
||
class EarlyStoppingTestStatistics(SimpleTestStatistics): | ||
""" Additionally to SimpleTestStatistics, holds boolean flag for early stopping. | ||
|
||
:type control_statistics: SampleStatistics | ||
:type treatment_statistics: SampleStatistics | ||
:type delta: float | ||
:type p: float | ||
:type statistical_power: float | ||
:param ci: a dict where keys are percentiles and values are the corresponding value for the statistic. | ||
:type ci: dict | ||
:type stop: bool | ||
""" | ||
def __init__(self, control_statistics, treatment_statistics, delta, ci, p, statistical_power, stop): | ||
super(EarlyStoppingTestStatistics, self).__init__(control_statistics, treatment_statistics, delta, ci, p, statistical_power) | ||
self.stop = stop | ||
|
||
|
||
class CorrectedTestStatistics(JsonSerializable): | ||
""" Holds original and corrected statistics. This class should be used to hold statistics for multiple testing. | ||
original_test_statistics and corrected_test_statistics should have the same type. | ||
|
||
:param original_test_statistics: test result before correction | ||
:type original_test_statistics: SimpleTestStatistics or EarlyStoppingTestStatistics | ||
:param corrected_test_statistics: test result after correction | ||
:type corrected_test_statistics: SimpleTestStatistics or EarlyStoppingTestStatistics | ||
""" | ||
def __init__(self, original_test_statistics, corrected_test_statistics): | ||
type1 = type(original_test_statistics) | ||
type2 = type(corrected_test_statistics) | ||
if type1 != type2: | ||
raise RuntimeError("Type mismatch for type " + str(type1) + " and " + str(type2)) | ||
if not isinstance(original_test_statistics, BaseTestStatistics): | ||
raise RuntimeError("Input should be instances of BaseTestStatistics or its subclass") | ||
self.original_test_statistics = original_test_statistics | ||
self.corrected_test_statistics = corrected_test_statistics | ||
|
||
|
||
# --------- Below are the data structure of test results --------- # | ||
class StatisticalTestResult(JsonSerializable): | ||
""" This class holds the results of a single statistical test. | ||
|
||
:param test: information about the statistical test | ||
:type test: StatisticalTest | ||
:param result: result of this statistical test | ||
:type result: BaseTestStatistics or its subclasses or CorrectedTestStatistics #TODO: better approach? | ||
""" | ||
def __init__(self, test, result): | ||
self.test = test | ||
self.result = result | ||
|
||
|
||
class MultipleTestSuiteResult(JsonSerializable): | ||
""" This class holds the results of a MultipleTestSuite. | ||
|
||
:param statistical_test_results: test results for all statistical testing unit | ||
:type statistical_test_results: list[StatisticalTestResult] | ||
:param correction_method: method used for multiple testing correction. Possible values are: | ||
"none": no correction | ||
"bh": benjamini hochberg correction | ||
"bf": bonferroni correction | ||
:type correction_method: str | ||
""" | ||
def __init__(self, statistical_test_results, correction_method="none"): | ||
self.statistical_test_results = statistical_test_results | ||
if correction_method not in ["none", "bh", "bf"]: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not stick to something like an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So, for analysis service it's ok to have parameters for correction as 0, 1 or 2. We wanted to leave at least a short names for correction for somebody who can use expan from outside Zalando. I also don't think we need to have a class here. Or you have some advantages in mind for doing it as Enum? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Each time you add a new correction type or change an existing one, you'll have to find all occurences of, say, There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @gbordyugov yes I agree. One more question: do you also think we need to create a Enum for the delta methods? We currently maintain a lookup table for "group_sequential", "bayes_factor", "fixed_horizon", etc There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, that would be another application of this strategy, totally agree! |
||
raise ValueError('Correction method is not implemented. We support "none", "bh", and "bf".') | ||
self.correction_method = correction_method |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
import pandas as pd | ||
from expan.core.util import JsonSerializable | ||
|
||
|
||
class StatisticalTest(JsonSerializable): | ||
""" This class describes what has to be tested against what and represent a unit of statistical testing. | ||
|
||
:param kpi: the kpi to perform on | ||
:type kpi: KPI or its subclass | ||
:param features: list of features used for subgroups | ||
:type features: list[FeatureFilter] | ||
:param variants: variant column name and their values | ||
:type variants: Variants | ||
""" | ||
def __init__(self, kpi, features, variants): | ||
if not isinstance(features, list): | ||
raise TypeError("Features should be a list.") | ||
if not all(isinstance(n, FeatureFilter) for n in features): | ||
raise TypeError("Some features are not of the type FeatureFilter.") | ||
self.kpi = kpi | ||
self.features = features | ||
self.variants = variants | ||
|
||
|
||
class KPI(JsonSerializable): | ||
""" This class represents a basic kpi. | ||
:param name: name of the kpi | ||
:type name: str | ||
""" | ||
def __init__(self, name): | ||
self.name = name | ||
|
||
|
||
class DerivedKPI(KPI): | ||
""" This class represents a derived KPI which is a ratio of two columns. | ||
Names of the the two columns are passed as numerator and denominator. | ||
|
||
:param name: name of the kpi | ||
:type name: str | ||
:param numerator: the numerator for the derived KPI | ||
:type numerator: str | ||
:param denominator: the denominator for the derived KPI | ||
:type denominator: str | ||
""" | ||
def __init__(self, name, numerator, denominator): | ||
super(DerivedKPI, self).__init__(name) | ||
self.numerator = numerator | ||
self.denominator = denominator | ||
|
||
def make_derived_kpi(self, data): | ||
""" Create the derived kpi column if it is not yet created. """ | ||
if self.name not in data.columns: | ||
data.loc[:, self.name] = data[self.numerator]/data[self.denominator].astype(float) | ||
|
||
|
||
class StatisticalTestSuite(JsonSerializable): | ||
""" This class consists of a number of tests plus choice of the correction method. | ||
|
||
:param tests: list of statistical tests in the suite | ||
:type tests: list[StatisticalTest] | ||
:param correction_method: method used for multiple testing correction. Possible values are: | ||
"none": no correction | ||
"bh": benjamini hochberg correction | ||
"bf": bonferroni correction | ||
:type correction_method: str | ||
""" | ||
def __init__(self, tests, correction_method="none"): | ||
self.tests = tests | ||
if correction_method not in ["none", "bh", "bf"]: | ||
raise ValueError('Correction method is not implemented. We support "none", "bh", and "bf".') | ||
self.correction_method = correction_method | ||
|
||
@property | ||
def size(self): | ||
return len(self.tests) | ||
|
||
|
||
class FeatureFilter(JsonSerializable): | ||
""" This class represents a filter, restricting a DataFrame to rows with column_value in column_name. | ||
|
||
It can be used to specify subgroup conditions. | ||
:param column_name: name of the column to perform filter on | ||
:type column_name: str | ||
:param column_value: value of the column to perform filter on | ||
:type column_value: str | ||
""" | ||
def __init__(self, column_name, column_value): | ||
self.column_name = column_name | ||
self.column_value = column_value | ||
|
||
def apply_to_data(self, data): | ||
return data[data[self.column_name] == self.column_value] | ||
|
||
|
||
class Variants(JsonSerializable): | ||
""" This class represents information of variants. | ||
|
||
:param variant_column_name: name of the column that represents variant | ||
:type variant_column_name: str | ||
:param control_name: value of the variant that represents control group | ||
:type control_name: str | ||
:param treatment_name: value of the variant that represents control group | ||
:type treatment_name: str | ||
""" | ||
def __init__(self, variant_column_name, control_name, treatment_name): | ||
self.variant_column_name = variant_column_name | ||
self.control_name = control_name | ||
self.treatment_name = treatment_name | ||
|
||
def get_variant(self, data, variant_name): | ||
result = data[data[self.variant_column_name] == variant_name] | ||
if not isinstance(result, pd.DataFrame): | ||
result = pd.DataFrame([result]) | ||
return result |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Didn't we have a class for
ConfidenceInterval
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We had a class previously, but then we found that it leads to two times of the fields:
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The
TODO
here is to think of whether we want:or
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I see, thank you for clarifying this one for me.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the problem I see here with structure {'2.5': 0.5, '97.5': 0.4} is that everywhere where we need to access those fields we always need to know those numbers (2.5 and 97.5). If they are fixed, then it's ok-ish, but things can change if we decide to have another percentiles (for example) or add some intermediate (we allowed that before as far as I remember).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you are right. Then I think we can stay with
{"percentile": 2.5, "value": -1}
.