Skip to content

Commit 193660a

Browse files
authored
Add files via upload
1 parent a576830 commit 193660a

13 files changed

+184
-17
lines changed

week7/machine-learning-ex6/ex6/dataset3Params.m

+21-4
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,28 @@
2323
% mean(double(predictions ~= yval))
2424
%
2525

26+
test_range = [0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30];
27+
C_opt = C;
28+
sigma_opt = sigma;
29+
test_range_len = size(test_range,2);
30+
error_min = 10^6;
31+
for i = 1:test_range_len
32+
for j = 1:test_range_len
33+
C = test_range(i);
34+
sigma = test_range(j);
35+
model = svmTrain(X, y, C, @(x1, x2) gaussianKernel(x1, x2, sigma));
36+
predictions = svmPredict(model, Xval);
37+
error_fraction = mean(double(predictions ~= yval));
38+
if error_fraction < error_min
39+
C_opt = C;
40+
sigma_opt = sigma;
41+
error_min = error_fraction;
42+
end
43+
end
44+
end
2645

27-
28-
29-
30-
46+
C = C_opt;
47+
sigma = sigma_opt;
3148

3249
% =========================================================================
3350

week7/machine-learning-ex6/ex6/emailFeatures.m

+5
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,11 @@
4848
%
4949
%
5050

51+
for i = word_indices
52+
x(i) = 1;
53+
end
54+
55+
5156

5257

5358

week7/machine-learning-ex6/ex6/ex6.m

+1
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@
5151
% You should try to change the C value below and see how the decision
5252
% boundary varies (e.g., try C = 1000)
5353
C = 1;
54+
% C = 100;
5455
model = svmTrain(X, y, C, @linearKernel, 1e-3, 20);
5556
visualizeBoundaryLinear(X, y, model);
5657

week7/machine-learning-ex6/ex6/gaussianKernel.m

+19
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,30 @@
1515
% sigma
1616
%
1717
%
18+
summation = norm(x1-x2) .^2;
1819

20+
% K_gaussian
21+
sim = exp(-(summation/(2*sigma .^2)));
1922

2023

24+
% ----
25+
% Tung Le's result
26+
% >> ex6
27+
% Loading and Visualizing Data ...
28+
% Program paused. Press enter to continue.
2129

30+
% Training Linear SVM ...
2231

32+
% Training ......................................................................
33+
% ........................................ Done!
34+
35+
% Program paused. Press enter to continue.
36+
37+
% Evaluating the Gaussian Kernel ...
38+
% Gaussian Kernel between x1 = [1; 2; 1], x2 = [0; 4; -1], sigma = 2.000000 :
39+
% 0.324652
40+
% (for sigma = 2, this value should be about 0.324652)
41+
% Program paused. Press enter to continue.
2342

2443
% =============================================================
2544

11 Bytes
Binary file not shown.

week7/machine-learning-ex6/ex6/processEmail.m

+11-13
Original file line numberDiff line numberDiff line change
@@ -33,24 +33,23 @@
3333
% Look for one or more characters between 0-9
3434
email_contents = regexprep(email_contents, '[0-9]+', 'number');
3535

36-
% Handle URLS
36+
% Handle URLS % replace the URL to httpaddr.
3737
% Look for strings starting with http:// or https://
3838
email_contents = regexprep(email_contents, ...
3939
'(http|https)://[^\s]*', 'httpaddr');
4040

41-
% Handle Email Addresses
41+
% Handle Email Addresses, replace by emailaddr
4242
% Look for strings with @ in the middle
4343
email_contents = regexprep(email_contents, '[^\s]+@[^\s]+', 'emailaddr');
4444

45-
% Handle $ sign
45+
% Handle $ sign, replace the dollar sign by dollar char
4646
email_contents = regexprep(email_contents, '[$]+', 'dollar');
4747

48-
4948
% ========================== Tokenize Email ===========================
5049

5150
% Output the email to screen as well
5251
fprintf('\n==== Processed Email ====\n\n');
53-
52+
% fprintf('%s ', email_contents);
5453
% Process file
5554
l = 0;
5655

@@ -97,14 +96,13 @@
9796
% str2). It will return 1 only if the two strings are equivalent.
9897
%
9998

100-
101-
102-
103-
104-
105-
106-
107-
99+
% function:
100+
% If a match exists, add the index of 'str' to word_indices.
101+
for i = 1:length(vocabList)
102+
if(strcmp(str,vocabList{i}))
103+
word_indices = [word_indices; i]
104+
end
105+
end
108106

109107
% =============================================================
110108

+15
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Created by Octave 3.8.0, Tue Mar 28 00:00:33 2017 CDT <tungthanhle@Tungs-MacBook-Pro.local>
2+
# name: email
3+
# type: sq_string
4+
# elements: 1
5+
# length: 16
6+
ttungl@gmail.com
7+
8+
9+
# name: token
10+
# type: sq_string
11+
# elements: 1
12+
# length: 16
13+
NsyW3miuT2G4WGYI
14+
15+
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
2+
word_indices =
3+
471
4+
1893
5+
1809
6+
1699
7+
997
8+
479
9+
1182
10+
1064
11+
1231
12+
1827
13+
810
14+
1893
15+
103
16+
1070
17+
74
18+
1346
19+
837
20+
1852
21+
1242
22+
1699
23+
1893
24+
1631
25+
1665
26+
1852
27+
997
28+
1893
29+
479
30+
1120
31+
1231
32+
1827
33+
1182
34+
1064
35+
1676
36+
877
37+
1113
38+
234
39+
1191
40+
1120
41+
792
42+
1287
43+
1377
44+
1120
45+
1699
46+
708
47+
1666
48+
440
49+
1120
50+
1120
51+
1120
52+
1093
53+
1230
54+
1844
55+
1809
56+
1699
57+
997
58+
1490
59+
1060
60+
997
61+
1666
62+
234
63+
74
64+
708
65+
1666
66+
608
67+
869
68+
1120
69+
1048
70+
825
71+
1896
72+
1117
73+
1120
74+
1120
75+
1120
76+
976
77+
676
78+
1699
79+
1895
80+
234
81+
74
82+
1852
83+
867
84+
1893
85+
1699
86+
1230
87+
956
88+
1896
89+
1844
90+
103
91+
387
92+
997
93+
479
94+
1120
95+
1265
96+
1231
97+
1827
98+
1120
99+
1120
100+
1120
101+
102+
number numberljgvnumb numberleannumberlrmsnumb
103+
numberwxhonumberqiytnumb numberrjuvnumberhqcfnumb numbereidbnumberdmtvlnumb
104+
105+
=========================
106+
107+
Processed spamSample1.txt
108+
109+
Spam Classification: 1
110+
(1 indicates spam, 0 indicates not spam)
111+
112+
>>

0 commit comments

Comments
 (0)