Skip to content

Commit 11a2aea

Browse files
Data engineering profils et groupes de navires (#4158)
## Linked issues - Resolve #358
2 parents bc61ac9 + 7f58851 commit 11a2aea

24 files changed

+1211
-89
lines changed

.gitignore

+4
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,10 @@ frontend/cypress/videos/
180180
/datascience/src/pipeline/data/non_commited_data/pno_segments_subscriptions.csv
181181
/datascience/src/pipeline/data/non_commited_data/pno_vessels_subscriptions.csv
182182

183+
# Ignore downloaded external repositories
184+
datascience/tests/test_data/external/*
185+
!datascience/tests/test_data/external/README.md
186+
183187
# Kotlin sessions
184188
.kotlin/
185189

Makefile

+26-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
INFRA_FOLDER="$(shell pwd)/infra/configurations/"
22
HOST_MIGRATIONS_FOLDER=$(shell pwd)/backend/src/main/resources/db/migration
3+
DATA_WAREHOUSE_INPUT_DATA_FOLDER=$(shell pwd)/datascience/tests/test_data/clickhouse_user_files
4+
EXTERNAL_DATA_FOLDER=$(shell pwd)/datascience/tests/test_data/external
35

46
SHELL := /bin/bash
57
.SHELLFLAGS = -ec
@@ -270,8 +272,8 @@ docker-compose-puppeteer-up: docker-env
270272

271273
docker-build-pipeline:
272274
docker build -f ./infra/docker/datapipeline/Dockerfile . -t monitorfish-pipeline:$(VERSION)
273-
docker-test-pipeline:
274-
docker run --network host -v /var/run/docker.sock:/var/run/docker.sock -u monitorfish-pipeline:$(DOCKER_GROUP) --env-file datascience/.env.test --env HOST_MIGRATIONS_FOLDER=$(HOST_MIGRATIONS_FOLDER) monitorfish-pipeline:$(VERSION) coverage run -m pytest --pdb tests
275+
docker-test-pipeline: fetch-external-data run-data-warehouse
276+
docker run --network host -v $(EXTERNAL_DATA_FOLDER):/home/monitorfish-pipeline/datascience/tests/test_data/external -v /var/run/docker.sock:/var/run/docker.sock -u monitorfish-pipeline:$(DOCKER_GROUP) --env-file datascience/.env.test --env HOST_MIGRATIONS_FOLDER=$(HOST_MIGRATIONS_FOLDER) monitorfish-pipeline:$(VERSION) coverage run -m pytest --pdb --ignore=tests/test_data/external tests
275277
docker-tag-pipeline:
276278
docker tag monitorfish-pipeline:$(VERSION) docker.pkg.github.com/mtes-mct/monitorfish/monitorfish-pipeline:$(VERSION)
277279
docker-push-pipeline:
@@ -284,8 +286,29 @@ docker-push-pipeline:
284286

285287
install-pipeline:
286288
cd datascience && poetry install
289+
290+
stop-data-warehouse:
291+
export DATA_WAREHOUSE_PASSWORD=password && \
292+
export DATA_WAREHOUSE_USER=clickhouse_user && \
293+
export DATA_WAREHOUSE_INPUT_DATA_FOLDER=$(DATA_WAREHOUSE_INPUT_DATA_FOLDER) && \
294+
docker compose -f ./datascience/tests/docker-compose.yml down -v
295+
296+
fetch-external-data:
297+
git clone --depth=1 --branch=main https://github.com/MTES-MCT/fisheries-and-environment-data-warehouse.git ./datascience/tests/test_data/external/data_warehouse || echo "Data Warehouse repository already present - skipping git clone"
298+
299+
erase-external-data:
300+
rm -rf datascience/tests/test_data/external/data_warehouse
301+
302+
run-data-warehouse:
303+
export DATA_WAREHOUSE_PASSWORD=password && \
304+
export DATA_WAREHOUSE_USER=clickhouse_user && \
305+
export DATA_WAREHOUSE_INPUT_DATA_FOLDER=$(DATA_WAREHOUSE_INPUT_DATA_FOLDER) && \
306+
docker compose -f ./datascience/tests/docker-compose.yml up -d --remove-orphans
307+
287308
test-pipeline:
288-
cd datascience && export TEST_LOCAL=True && poetry run coverage run -m pytest --pdb tests/ && poetry run coverage report && poetry run coverage html
309+
cd datascience && export TEST_LOCAL=True && poetry run coverage run -m pytest --pdb --ignore=tests/test_data/external tests/ && poetry run coverage report && poetry run coverage html
310+
311+
test-pipeline-with-data_warehouse: fetch-external-data run-data-warehouse test-pipeline stop-data-warehouse
289312

290313
# ----------------------------------------------------------
291314
# Remote: Database commands
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
-- Vessel profiles including the share (by weight) of catches made per gear, species, fao area, segment, landing_port...
2+
CREATE TABLE IF NOT EXISTS public.vessel_profiles (
3+
cfr VARCHAR NOT NULL,
4+
gears JSONB,
5+
species JSONB,
6+
fao_areas JSONB,
7+
segments JSONB,
8+
landing_ports JSONB,
9+
recent_gears JSONB,
10+
recent_species JSONB,
11+
recent_fao_areas JSONB,
12+
recent_segments JSONB,
13+
recent_landing_ports JSONB,
14+
latest_landing_port VARCHAR,
15+
latest_landing_facade facade
16+
);

datascience/.env.template

+5
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ MONITORFISH_LOCAL_NAME=
3333
MONITORFISH_LOCAL_USER=
3434
MONITORFISH_LOCAL_PWD=
3535

36+
DATA_WAREHOUSE_HOST=
37+
DATA_WAREHOUSE_PORT=
38+
DATA_WAREHOUSE_USER=
39+
DATA_WAREHOUSE_PWD=
40+
3641
# Proxies to use when accessing the Internet
3742
HTTP_PROXY_=
3843
HTTPS_PROXY_=

datascience/.env.test

+5
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@ MONITORFISH_LOCAL_NAME=db_name
2626
MONITORFISH_LOCAL_USER=db_user
2727
MONITORFISH_LOCAL_PWD=db_pwd
2828

29+
DATA_WAREHOUSE_HOST=0.0.0.0
30+
DATA_WAREHOUSE_PORT=8123
31+
DATA_WAREHOUSE_USER=clickhouse_user
32+
DATA_WAREHOUSE_PWD=password
33+
2934
# Proxy settings
3035
HTTPS_PROXY_=http://some.ip.address:port
3136
HTTP_PROXY_=http://some.ip.address:port

0 commit comments

Comments
 (0)