Skip to content

Commit 28f56a6

Browse files
committed
tracker added
1 parent 558bd74 commit 28f56a6

File tree

7 files changed

+126
-8
lines changed

7 files changed

+126
-8
lines changed

dvc.lock

+60
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
schema: '2.0'
2+
stages:
3+
load_data:
4+
cmd: python src\load_data.py --config=params.yaml
5+
deps:
6+
- path: data_given/WineQT.csv
7+
md5: b03c22cbdd89823c0ec32044c4826343
8+
size: 78057
9+
- path: src/get_data.py
10+
md5: 02093608f13041fb074ffd78f04c92e4
11+
size: 719
12+
- path: src/load_data.py
13+
md5: 0d5a941034eed3cc6894ccdd4cbd0fcc
14+
size: 691
15+
outs:
16+
- path: data/raw/WineQT.csv
17+
md5: 928a3899cf30e454051988a983c38c6e
18+
size: 72970
19+
split_data:
20+
cmd: python src/split_data.py --config=params.yaml
21+
deps:
22+
- path: data/raw/WineQT.csv
23+
md5: 928a3899cf30e454051988a983c38c6e
24+
size: 72970
25+
- path: src/split_data.py
26+
md5: 85a3cc19596dbbf90162642115c60acc
27+
size: 1092
28+
outs:
29+
- path: data/processed/test_WineQT.csv
30+
md5: 0e9af019b6dd8b86b4fd1fea505a6888
31+
size: 14805
32+
- path: data/processed/train_WineQT.csv
33+
md5: 727b5be0ccb3090ab1d49b02aea7a6e3
34+
size: 58315
35+
train_and_evaluate:
36+
cmd: python src/train_and_evaluate.py --config=params.yaml
37+
deps:
38+
- path: data/processed/test_WineQT.csv
39+
md5: 0e9af019b6dd8b86b4fd1fea505a6888
40+
size: 14805
41+
- path: data/processed/train_WineQT.csv
42+
md5: 727b5be0ccb3090ab1d49b02aea7a6e3
43+
size: 58315
44+
- path: src/train_and_evaluate.py
45+
md5: bfcea7416270c8a6b45ea2e378624d71
46+
size: 2657
47+
params:
48+
params.yaml:
49+
estimators.ElasticNet.params.alpha: 0.9
50+
estimators.ElasticNet.params.l1_ratio: 0.4
51+
outs:
52+
- path: report/params.json
53+
md5: a4aab0167612f21a6085dd05cfafdbf6
54+
size: 44
55+
- path: report/scores.json
56+
md5: 05a6db037c56b81bf051acef60f76ffe
57+
size: 100
58+
- path: saved_models/model.joblib
59+
md5: 33d8e2b9814ee7d82f9f5621e2fdbfdc
60+
size: 1239

dvc.yaml

+17-1
Original file line numberDiff line numberDiff line change
@@ -16,5 +16,21 @@ stages:
1616
outs:
1717
- data/processed/train_WineQT.csv
1818
- data/processed/test_WineQT.csv
19-
19+
20+
train_and_evaluate:
21+
cmd: python src/train_and_evaluate.py --config=params.yaml
22+
deps:
23+
- data/processed/train_WineQT.csv
24+
- data/processed/test_WineQT.csv
25+
- src/train_and_evaluate.py
26+
params:
27+
- estimators.ElasticNet.params.alpha
28+
- estimators.ElasticNet.params.l1_ratio
29+
metrics:
30+
- report/scores.json:
31+
cache: false
32+
- report/params.json:
33+
cache: false
34+
outs:
35+
- saved_models/model.joblib
2036

params.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
base:
22
project: winequality-project
33
random_state: 42
4-
target_col: TARGET
4+
target_col: quality
55

66
data_source:
77
s3_source: data_given/WineQT.csv

report/params.json

+4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"alpha": 0.9,
3+
"l1_ratio": 0.4
4+
}

report/scores.json

+5
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"rmse": 0.7191050648744693,
3+
"mae": 0.5907059594776091,
4+
"r2": 0.07073280314369634
5+
}

saved_models/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/model.joblib

src/train_and _evaluate.py src/train_and_evaluate.py

+38-6
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import os
66
import pandas as pd
7+
import numpy as np
78
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
89
from sklearn.model_selection import train_test_split
910
from sklearn.linear_model import ElasticNet
@@ -30,25 +31,56 @@ def train_and_evaluate(config_path):
3031
alpha=config["estimators"]["ElasticNet"]["params"]["alpha"]
3132
l1_ratio=config["estimators"]["ElasticNet"]["params"]["l1_ratio"]
3233
target=config["base"]["target_col"]
33-
34+
3435
train= pd.read_csv(train_data_path, sep=",")
3536
test= pd.read_csv(test_data_path, sep=",")
36-
37+
3738
train_y= train[target]
3839
test_y= test[target]
39-
40+
4041
train_x= train.drop(target, axis=1)
4142
test_x= test.drop(target, axis=1)
42-
43+
4344
lr= ElasticNet(
4445
alpha=alpha,
4546
l1_ratio=l1_ratio,
4647
random_state=random_state)
47-
48+
4849
lr.fit(train_x, train_y)
4950
predicted_qualities= lr.predict(test_x)
50-
51+
5152
(rmse, mae, r2) = eval_metrics(test_y, predicted_qualities)
53+
54+
print("Elasticnet model (alpha=%f, l1_ratio=%f):" % (alpha, l1_ratio))
55+
print(f" RMSE: {rmse}")
56+
print(f" MAE: {mae}")
57+
print(f" R2: {r2}")
58+
59+
#####################################################
60+
scores_file = config["reports"]["scores"]
61+
params_file = config["reports"]["params"]
62+
63+
with open(scores_file, "w") as f:
64+
scores = {
65+
"rmse": rmse,
66+
"mae": mae,
67+
"r2": r2
68+
}
69+
json.dump(scores, f, indent=4)
70+
71+
with open(params_file, "w") as f:
72+
params = {
73+
"alpha": alpha,
74+
"l1_ratio": l1_ratio,
75+
}
76+
json.dump(params, f, indent=4)
77+
#####################################################
78+
79+
80+
os.makedirs(model_dir, exist_ok=True)
81+
model_path = os.path.join(model_dir, "model.joblib")
82+
83+
joblib.dump(lr, model_path)
5284

5385

5486

0 commit comments

Comments
 (0)