-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1-online-passive-aggressive.py
126 lines (97 loc) · 4.51 KB
/
1-online-passive-aggressive.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
def load_data(data_file_path):
# using breast cancer WIS consin dataset
data = pd.read_csv(data_file_path,
usecols=[1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
names=["clump_thickness", "uniformity_of_cell_size", "uniformity_of_cell_shape",
"marginal_adhesion", "single_epi_cell_size", "bare_nuclei", "bland_chro",
"normal_nucleoli", "mitoses", "class"])
data["class"] = data["class"].replace({2: -1, 4: 1})
# add all features to X matrix
# add all outputs vector Y
x = data.iloc[:, 0:9]
y = data.loc[:, "class"]
# normalize data
x_normalized = StandardScaler().fit_transform(x.values)
x = pd.DataFrame(x_normalized)
# add 1 as b(intercept) value for every Xi'th row
x.insert(loc=len(x.columns), column='b', value=1)
# split into training and testing sets
x_train, x_test, y_train, y_test = tts(x, y, test_size=1 / 3)
return x_train, x_test, y_train, y_test
def calculate_loss(w, x_instance, y_instance):
return max(0, (1 - y_instance * np.dot(w, x_instance)))
def calculate_update(loss, x_instance, c, tau):
if tau == 1:
return loss / np.dot(x_instance, x_instance)
elif tau == 2:
return min(c, loss / np.dot(x_instance, x_instance))
else:
aggressiveness_parameter_term = 1 / 2 * c
update_value = loss / (np.dot(x_instance, x_instance) + aggressiveness_parameter_term)
return update_value
def train_model(x, y, num_of_iterations, tau):
# create a weight vector
w = np.zeros(x.shape[1])
w_1 = np.zeros(x.shape[1])
w_2 = np.zeros(x.shape[1])
w_10 = np.zeros(x.shape[1])
# define C(aggressiveness parameter)
c = 1
for iteration in range(0, num_of_iterations):
for index, example_value in enumerate(x):
# pass one example at a time to train the model
loss = calculate_loss(w, example_value, y[index])
update = calculate_update(loss, example_value, c, tau)
w = w + (update * y[index] * example_value)
if iteration == 1:
w_1 = w
if iteration == 2:
w_2 = w
if iteration == 9:
w_10 = w
return w_1, w_2, w_10
def predict_category(w_1, w_2, w_10, x):
y_predicted_1 = []
y_predicted_2 = []
y_predicted_10 = []
for i in range(x.shape[0]):
y_predicted_1.append(np.sign(np.dot(w_1, x.to_numpy()[i])))
y_predicted_2.append(np.sign(np.dot(w_2, x.to_numpy()[i])))
y_predicted_10.append(np.sign(np.dot(w_10, x.to_numpy()[i])))
return y_predicted_1, y_predicted_2, y_predicted_10
def get_prediction_counts(y):
benign_count = 0
malignant_count = 0;
for value in y:
if value == -1:
benign_count += 1
elif value == 1:
malignant_count += 1
return benign_count, malignant_count
x_train, x_test, y_train, y_test = load_data('./data/breast-cancer-wisconsin.data')
w_1, w_2, w_10 = train_model(x_train.to_numpy(), y_train.to_numpy(), 10, 3)
y_test_predicted_1, y_test_predicted_2, y_test_predicted_10 = predict_category(w_1, w_2, w_10, x_test)
y_train_predicted_1, y_train_predicted_2, y_train_predicted_10 = predict_category(w_1, w_2, w_10, x_train)
print("testing accuracy on 1 iteration: " + str(accuracy_score(y_test.to_numpy(), y_test_predicted_1)) +
" training accuracy on 1 iteration: " + str(accuracy_score(y_train.to_numpy(), y_train_predicted_1)))
print("accuracy on 2 iterations: " + str(accuracy_score(y_test.to_numpy(), y_test_predicted_2)) +
" training accuracy on 2 iteration: " + str(accuracy_score(y_train.to_numpy(), y_train_predicted_2)))
print("accuracy on 10 iterations: " + str(accuracy_score(y_test.to_numpy(), y_test_predicted_10)) +
" training accuracy on 10 iteration: " + str(accuracy_score(y_train.to_numpy(), y_train_predicted_10)))
be, mal = get_prediction_counts(y_test_predicted_1)
print("1 iteration\n##############")
print("Benign: " + str(be))
print("Malignant: " + str(mal) + "\n")
be, mal = get_prediction_counts(y_test_predicted_2)
print("2 iterations\n##############\n")
print("Benign: " + str(be))
print("Malignant: " + str(mal) + "\n")
be, mal = get_prediction_counts(y_test_predicted_10)
print("10 iterations\n##############\n")
print("Benign: " + str(be))
print("Malignant: " + str(mal))