-
Notifications
You must be signed in to change notification settings - Fork 12
/
Copy pathrun_quickstart_from_scratch.py
145 lines (120 loc) · 5.13 KB
/
run_quickstart_from_scratch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
import pandas as pd
from autogluon.tabular import TabularPredictor
from autogluon_benchmark import OpenMLTaskWrapper
from tabrepo import EvaluationRepository
def get_artifacts(task: OpenMLTaskWrapper, fold: int, hyperparameters: dict, dataset: str = None, time_limit=60):
if dataset is None:
dataset = str(task.task_id)
print(f"Fitting configs on dataset: {dataset}\t| fold: {fold}")
train_data, test_data = task.get_train_test_split_combined(fold=fold)
predictor: TabularPredictor = TabularPredictor(label=task.label).fit(
train_data=train_data,
hyperparameters=hyperparameters,
ag_args_fit={"ag.max_time_limit": time_limit},
fit_weighted_ensemble=False,
calibrate=False,
verbosity=0,
)
leaderboard = predictor.leaderboard(test_data, score_format="error")
leaderboard["dataset"] = dataset
leaderboard["tid"] = task.task_id
leaderboard["fold"] = fold
leaderboard["problem_type"] = task.problem_type
leaderboard.rename(columns={
"eval_metric": "metric",
"metric_error_test": "metric_error",
}, inplace=True)
simulation_artifact = predictor.simulation_artifact(test_data=test_data)
simulation_artifacts = {dataset: {fold: simulation_artifact}}
return simulation_artifacts, leaderboard
def convert_leaderboard_to_configs(leaderboard: pd.DataFrame, minimal: bool = True) -> pd.DataFrame:
df_configs = leaderboard.rename(columns=dict(
model="framework",
fit_time="time_train_s",
pred_time_test="time_infer_s"
))
if minimal:
df_configs = df_configs[[
"dataset",
"fold",
"framework",
"metric_error",
"metric_error_val",
"metric",
"problem_type",
"time_train_s",
"time_infer_s",
"tid",
]]
return df_configs
"""
This tutorial showcases how to generate a small context from scratch using AutoGluon.
For the code to generate the full context, refer to https://github.com/autogluon/tabrepo/tree/main/scripts/execute_benchmarks
Required dependencies:
```bash
# Requires autogluon-benchmark
git clone https://github.com/Innixma/autogluon-benchmark.git
pip install -e autogluon-benchmark
```
This example script runs 7 configs on 3 tiny datasets with 2 folds, for a total of 42 trained models.
The full TabRepo runs 1530 configs on 244 datasets with 3 folds, using 8-fold bagging, for a total of 1,119,960 trained bagged models consisting of 8,959,680 fold models.
"""
if __name__ == '__main__':
# list of datasets to train on
datasets = [
"Australian",
"blood-transfusion",
"meta",
]
# dataset to task id map to reference the appropriate OpenML task.
dataset_to_tid_dict = {
"Australian": 146818,
"blood-transfusion": 359955,
"meta": 3623,
}
# time limit in seconds each config gets to train per dataset. Early stopped if exceeded.
time_limit_per_config = 60 # 3600 in paper
# the configs to train on each dataset
hyperparameters = {
"GBM": {},
"XGB": {},
"CAT": {},
"FASTAI": {},
"NN_TORCH": {},
"RF": {},
"XT": {},
}
# the folds to train on each dataset
folds = [0, 1]
artifacts = []
# Fit models on the datasets and get their artifacts
for dataset in datasets:
task_id = dataset_to_tid_dict[dataset]
for fold in folds:
task = OpenMLTaskWrapper.from_task_id(task_id=task_id)
artifacts.append(
get_artifacts(
task=task,
fold=fold,
dataset=dataset,
hyperparameters=hyperparameters,
time_limit=time_limit_per_config,
)
)
results_lst_simulation_artifacts = [simulation_artifacts for simulation_artifacts, leaderboard in artifacts]
leaderboards = [leaderboard for simulation_artifacts, leaderboard in artifacts]
leaderboard_full = pd.concat(leaderboards)
df_configs = convert_leaderboard_to_configs(leaderboard=leaderboard_full)
print(df_configs)
repo = EvaluationRepository.from_raw(df_configs=df_configs, results_lst_simulation_artifacts=results_lst_simulation_artifacts)
# Note: Can also skip all the above code if you want to use a readily available context rather than generating from scratch:
# repo = EvaluationRepository.from_context(version="D244_F3_C1530_30", cache=True)
repo.print_info()
repo = repo.to_zeroshot()
results_cv = repo.simulate_zeroshot(num_zeroshot=3, n_splits=2, backend="seq")
df_results = repo.generate_output_from_portfolio_cv(portfolio_cv=results_cv, name="quickstart")
# TODO: Fix time_infer_s to only include used models in the ensemble
# TODO: Add way to fetch model hyperparameters and generate input hyperparameters dict based on `portfolio` column.
# TODO: Run `portfolio` on datasets to verify that the true performance matches the simulated performance.
with pd.option_context("display.max_rows", None, "display.max_columns", None, "display.width", 1000):
print(df_results)