-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_part2.py
83 lines (69 loc) · 3.23 KB
/
run_part2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
"""
File name: run_part2.py
Author: Patrick Cummings
Date created: 11/17/2019
Date last modified: 11/24/2019
Python Version: 3.7
"""
import json
# import random
from pathlib import Path
import pandas as pd
from models.random_forest import RandomForestClassifier
train_set = pd.read_csv('data/pa3_train.csv')
validation_set = pd.read_csv('data/pa3_val.csv')
test_set = pd.read_csv('data/pa3_test.csv')
# Drop 'veil-type_p', feature has value 1 for all instances.
train_set = train_set.drop('veil-type_p', axis=1)
validation_set = validation_set.drop('veil-type_p', axis=1)
test_set = test_set.drop('veil-type_p', axis=1)
# Vary tree size.
for n in [1, 2, 5, 10, 25]:
# Create random forest with n trees, depth = 2, and n_features = 5 and save results in model_output.
rf = RandomForestClassifier(train_set, validation_set, test_set, label='class', n_trees=n, n_features=5, seed=5,
max_depth=2)
results = rf.train()
# Save output for learned model to .json file.
output_folder = Path('model_output/part2')
output_path = Path(__file__).parent.resolve().joinpath(output_folder)
training_file = output_path.joinpath(Path('rf_ntrees_' + str(n) + '_nfeat_' + str(rf.m) + '.json'))
# Create output directory if doesn't exist.
if not Path(output_path).exists():
Path(output_path).mkdir()
with open(training_file, 'w') as f:
json.dump(results, f, indent=4)
# Vary number of bagged features.
for m in [1, 2, 5, 10, 25, 50]:
# Create random forest with 15 trees, depth = 2, and n_features = m and save results in /model_output.
rf = RandomForestClassifier(train_set, validation_set, test_set, label='class', n_trees=15, n_features=m, seed=5,
max_depth=2)
results = rf.train()
# Save output for learned model to .json file.
output_folder = Path('model_output/part2')
output_path = Path(__file__).parent.resolve().joinpath(output_folder)
training_file = output_path.joinpath(Path('rf_ntrees_15' + '_nfeat_' + str(rf.m) + '.json'))
# Create output directory if doesn't exist.
if not Path(output_path).exists():
Path(output_path).mkdir()
with open(training_file, 'w') as f:
json.dump(results, f, indent=4)
# Vary random seed with best parameters from models created above.
n = 15
m = 25
i = 1 # index to include in name of saved files
seeds = [2201, 9325, 1033, 4179, 1931, 8117, 7364, 7737, 6219, 3439]
for s in seeds:
# Create random forest with 15 trees, depth = 2, and n_features = 25 and save results in /model_output.
rf = RandomForestClassifier(train_set, validation_set, test_set, label='class', n_trees=n, n_features=m, seed=s,
max_depth=2)
results = rf.train()
# Save output for learned model to .json file.
output_folder = Path('model_output/part2')
output_path = Path(__file__).parent.resolve().joinpath(output_folder)
training_file = output_path.joinpath(Path('rf_ntrees_15' + '_nfeat_' + str(rf.m) + '_seed_' + str(i) + '.json'))
# Create output directory if doesn't exist.
if not Path(output_path).exists():
Path(output_path).mkdir()
with open(training_file, 'w') as f:
json.dump(results, f, indent=4)
i += 1