Skip to content

Commit af34c01

Browse files
committed
updating data downloader script and dev dockerfile + fixing small issue in the experiments notebook
1 parent 1403b54 commit af34c01

File tree

3 files changed

+17
-9
lines changed

3 files changed

+17
-9
lines changed

data/download_data.sh

+4-2
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,11 @@ function parse_yaml {
2121
# setting important variables
2222
eval $(parse_yaml ../credentials.yaml "CONFIG_")
2323

24+
# defining important variables
2425
export KAGGLE_USERNAME="$CONFIG_KAGGLE_USERNAME"
2526
export KAGGLE_KEY="$CONFIG_KAGGLE_KEY"
26-
s3_bucket="$CONFIG_S3"
27+
export AWS_ACCESS_KEY_ID="$CONFIG_AWS_ACCESS_KEY"
28+
export AWS_SECRET_ACCESS_KEY="$CONFIG_AWS_SECRET_KEY"
2729

2830
# creating a folder within the temporary folder where the dataset will be temporarily saved
2931
mkdir /tmp/e2e-mlops-project/ && cd /tmp/e2e-mlops-project/
@@ -41,7 +43,7 @@ rm -f obesity-or-cvd-risk-classifyregressorcluster.zip
4143
mv ObesityDataSet.csv Original_ObesityDataSet.csv
4244

4345
# copying the csv file to the s3 bucket
44-
aws s3 cp Original_ObesityDataSet.csv s3://$s3_bucket
46+
aws s3 cp Original_ObesityDataSet.csv s3://$"$CONFIG_S3"
4547

4648
# deleting the create folder
4749
cd ~ && rm -rf /tmp/e2e-mlops-project

notebooks/dev_Dockerfile

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1-
# using the lastest version of miniconda as a base for the Docker image
2-
FROM python:3.10.14-slim
1+
# using the lastest version of Ubuntu 22.04 as a base for the Docker image
2+
FROM ubuntu:22.04
3+
4+
# installing Python and Unzip
5+
RUN apt-get update && apt-get install -y python3.10 python3.10-venv python3.10-dev python3-pip unzip
36

47
# creating the root folder
58
RUN mkdir -p /e2e-project/notebooks

notebooks/experimentations.ipynb

+8-5
Original file line numberDiff line numberDiff line change
@@ -213,10 +213,10 @@
213213
"with open(\"VERSION\", \"r\") as f:\n",
214214
" CODE_VERSION = f.readline().strip()\n",
215215
"\n",
216-
"if content[\"EC2\"] != \"\":\n",
216+
"if content[\"EC2\"] != \"YOUR_EC2_URL\":\n",
217217
" mlflow.set_tracking_uri(f\"http://{content['EC2']}:5000\") \n",
218218
"else:\n",
219-
" mlflow.set_tracking_uri(f\"http://127.0.0.1:5000\") \n",
219+
" raise ValueError(\"You must set an EC2 url!\\n\")\n",
220220
"\n",
221221
"print(f\"Tracking Server URI: '{mlflow.get_tracking_uri()}'\")\n",
222222
"\n",
@@ -273,11 +273,14 @@
273273
}
274274
],
275275
"source": [
276+
"os.environ[\"AWS_ACCESS_KEY_ID\"] = content[\"AWS_ACCESS_KEY\"]\n",
277+
"os.environ[\"AWS_SECRET_ACCESS_KEY\"] =content[\"AWS_SECRET_KEY\"]\n",
278+
"\n",
276279
"# downloading artifacts from the aws s3 bucket\n",
277-
"!aws s3 cp --recursive s3://{content[\"s3\"]}/artifacts {ARTIFACTS_OUTPUT_PATH}\n",
280+
"!aws s3 cp --recursive s3://{content[\"S3\"]}/artifacts {ARTIFACTS_OUTPUT_PATH}\n",
278281
"\n",
279282
"# downloading models from the aws s3 bucket\n",
280-
"!aws s3 cp --recursive s3://{content[\"s3\"]}/features {FEATURES_OUTPUT_PATH}\n"
283+
"!aws s3 cp --recursive s3://{content[\"S3\"]}/features {FEATURES_OUTPUT_PATH}"
281284
]
282285
},
283286
{
@@ -543,7 +546,7 @@
543546
"source": [
544547
"models = [dt, rf, xg, lg]\n",
545548
"min_features = math.floor(X_train.shape[1] * 0.2)\n",
546-
"max_features = math.floor(X_train.shape[1] * 0.3)\n",
549+
"max_features = math.floor(X_train.shape[1] * 0.5)\n",
547550
"\n",
548551
"# creating a new mlflow's experiment\n",
549552
"experiment_id = mlflow.create_experiment(\n",

0 commit comments

Comments
 (0)